diff --git a/docs/changes.txt b/docs/changes.txt index a914583f5..115489cb5 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -13,10 +13,11 @@ ## Features ## -- Added support to building Universal macOS binary on Apple Silicon -- Added support to use --debug-mode in attack-mode 9 (Association Attack) - Added guess data to --status-json output - Added hex format for --separator option +- Added new backend support for Metal, the OpenCL replacement API on Apple +- Added support to building Universal macOS binary on Apple Silicon +- Added support to use --debug-mode in attack-mode 9 (Association Attack) ## ## Bugs diff --git a/docs/credits.txt b/docs/credits.txt index 268719f2c..bb0f95a7d 100644 --- a/docs/credits.txt +++ b/docs/credits.txt @@ -21,6 +21,7 @@ Gabriele "matrix" Gristina (@gm4tr1x) * Multiple kernel modules * Compressed wordlist feature * OpenCL Info feature +* Apple Metal Runtime API feature * Apple macOS port * Apple Silicon support * Universal binary on Apple Silicon diff --git a/include/backend.h b/include/backend.h index c7e7de0a1..2f2a58f9c 100644 --- a/include/backend.h +++ b/include/backend.h @@ -40,43 +40,51 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx); int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r); void generate_source_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file); -void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file); +void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal); void generate_source_kernel_shared_filename (char *shared_dir, char *source_file); -void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum, char *cached_file); +void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal); void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *shared_dir, char *source_file); -void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum, char *cached_file); +void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal); void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_dir, char *source_file); -void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum, char *cached_file); - -int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw); - -int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos); - -int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); -int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); -int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size); -int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size); -int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); - -int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num); -int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num); -int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8 value, const u64 size); -int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size); -int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size); - -int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); -int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); -int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8 value, const u64 size); -int run_opencl_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size); -int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); - -int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration); -int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num); -int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param); -int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); -int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); -int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt); -int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_pos, const u64 pws_cnt); +void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal); + +int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw); + +int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos); + +int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); +int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); +int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size); +int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size); +int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); + +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num); +int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num); +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8 value, const u64 size); +int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size); +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size); + +#if defined (__APPLE__) +int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 num); +int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 num); +int run_metal_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 offset, const u8 value, const u64 size); +int run_metal_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 offset, const u32 value, const u64 size); +int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 size); +#endif + +int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); +int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); +int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8 value, const u64 size); +int run_opencl_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size); +int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); + +int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration); +int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num); +int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param); +int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); +int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); +int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt); +int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_pos, const u64 pws_cnt); void *hook12_thread (void *p); void *hook23_thread (void *p); diff --git a/include/ext_metal.h b/include/ext_metal.h new file mode 100644 index 000000000..1bb1425f8 --- /dev/null +++ b/include/ext_metal.h @@ -0,0 +1,118 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _EXT_METAL_H +#define _EXT_METAL_H + +#if defined (__APPLE__) + +#include +#include + +#define mtl_device_id id +#define mtl_command_queue id +#define mtl_function id +#define mtl_pipeline id +#define mtl_mem id +#define mtl_library id +#define mtl_command_buffer id +#define mtl_command_encoder id +#define mtl_blit_command_encoder id +#define mtl_compute_command_encoder id + +typedef enum metalDeviceAttribute +{ + MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 1, + MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY, + MTL_DEVICE_ATTRIBUTE_WARP_SIZE, + MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + MTL_DEVICE_ATTRIBUTE_CLOCK_RATE, + MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, + MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, + MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE, + MTL_DEVICE_ATTRIBUTE_HEADLESS, + MTL_DEVICE_ATTRIBUTE_LOW_POWER, + MTL_DEVICE_ATTRIBUTE_REMOVABLE, + MTL_DEVICE_ATTRIBUTE_REGISTRY_ID, + MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION, + MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER, + +} metalDeviceAttribute_t; + +typedef enum metalDeviceLocation +{ + // MTLDeviceLocationBuiltIn + // The GPU is built into the device + MTL_DEVICE_LOCATION_BUILTIN = 0, + + // MTLDeviceLocationSlot + // The GPU is connected to a slot inside the computer + MTL_DEVICE_LOCATION_SLOT = 1, + + // MTLDeviceLocationExternal + // The GPU is connected via an external interface, such as Thunderbolt + MTL_DEVICE_LOCATION_EXTERNAL = 2, + + // MTLDeviceLocationUnspecified + // The GPU's location is not specified or cannot be determined + MTL_DEVICE_LOCATION_UNSPECIFIED = 4294967295, + +} metalDeviceLocation_t; + +typedef struct hc_metal +{ + CFArrayRef devices; + +} hc_metal_t; + +typedef hc_metal_t MTL_PTR; + +int mtl_init (void *hashcat_ctx); +void mtl_close (void *hashcat_ctx); + +int hc_mtlRuntimeGetVersionString (void *hashcat_ctx, char *runtimeVersion_str, size_t *size); + +int hc_mtlDeviceGetCount (void *hashcat_ctx, int *count); +int hc_mtlDeviceGet (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal); +int hc_mtlDeviceGetName (void *hashcat_ctx, char *name, size_t len, mtl_device_id metal_device); +int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t attrib, mtl_device_id metal_device); +int hc_mtlDeviceTotalMem (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device); +int hc_mtlDeviceMaxMemAlloc (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device); +int hc_mtlMemGetInfo (void *hashcat_ctx, size_t *mem_free, size_t *mem_total); + +int hc_mtlCreateCommandQueue (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue *command_queue); +int hc_mtlCreateBuffer (void *hashcat_ctx, mtl_device_id metal_device, size_t size, void *ptr, mtl_mem *metal_buffer); + +int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_library metal_library, const char *func_name, mtl_function *metal_function, mtl_pipeline *metal_pipeline); + +int hc_mtlGetMaxTotalThreadsPerThreadgroup (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *maxTotalThreadsPerThreadgroup); +int hc_mtlGetThreadExecutionWidth (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *threadExecutionWidth); + +// copy buffer +int hc_mtlMemcpyDtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, mtl_mem buf_src, size_t buf_src_off, size_t buf_size); +// write +int hc_mtlMemcpyHtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, const void *buf_src, size_t buf_size); +// read +int hc_mtlMemcpyDtoH (void *hashcat_ctx, mtl_command_queue command_queue, void *buf_dst, mtl_mem buf_src, size_t buf_src_off, size_t buf_size); + +int hc_mtlReleaseMemObject (void *hashcat_ctx, mtl_mem metal_buffer); +int hc_mtlReleaseFunction (void *hashcat_ctx, mtl_function metal_function); +int hc_mtlReleaseLibrary (void *hashcat_ctx, mtl_function metal_library); +int hc_mtlReleaseCommandQueue (void *hashcat_ctx, mtl_command_queue command_queue); +int hc_mtlReleaseDevice (void *hashcat_ctx, mtl_device_id metal_device); + +int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device, const char *kernel_sources, const char *build_options_buf, const char *include_path, mtl_library *metal_library); +int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device, const char *cached_file, mtl_library *metal_library); + +int hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder); +int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size); + +int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms); + +#endif // __APPLE__ + +#endif // _EXT_METAL_H diff --git a/include/types.h b/include/types.h index 94d2f1505..9fe03e76d 100644 --- a/include/types.h +++ b/include/types.h @@ -660,6 +660,9 @@ typedef enum user_options_defaults NONCE_ERROR_CORRECTIONS = 8, BACKEND_IGNORE_CUDA = false, BACKEND_IGNORE_HIP = false, + #if defined (__APPLE__) + BACKEND_IGNORE_METAL = false, + #endif BACKEND_IGNORE_OPENCL = false, BACKEND_INFO = false, BACKEND_VECTOR_WIDTH = 0, @@ -711,116 +714,117 @@ typedef enum user_options_map IDX_BACKEND_DEVICES = 'd', IDX_BACKEND_IGNORE_CUDA = 0xff01, IDX_BACKEND_IGNORE_HIP = 0xff02, - IDX_BACKEND_IGNORE_OPENCL = 0xff03, + IDX_BACKEND_IGNORE_METAL = 0xff03, + IDX_BACKEND_IGNORE_OPENCL = 0xff04, IDX_BACKEND_INFO = 'I', - IDX_BACKEND_VECTOR_WIDTH = 0xff04, - IDX_BENCHMARK_ALL = 0xff05, + IDX_BACKEND_VECTOR_WIDTH = 0xff05, + IDX_BENCHMARK_ALL = 0xff06, IDX_BENCHMARK = 'b', - IDX_BITMAP_MAX = 0xff06, - IDX_BITMAP_MIN = 0xff07, + IDX_BITMAP_MAX = 0xff07, + IDX_BITMAP_MIN = 0xff08, #ifdef WITH_BRAIN IDX_BRAIN_CLIENT = 'z', - IDX_BRAIN_CLIENT_FEATURES = 0xff08, - IDX_BRAIN_HOST = 0xff09, - IDX_BRAIN_PASSWORD = 0xff0a, - IDX_BRAIN_PORT = 0xff0b, - IDX_BRAIN_SERVER = 0xff0c, - IDX_BRAIN_SERVER_TIMER = 0xff0d, - IDX_BRAIN_SESSION = 0xff0e, - IDX_BRAIN_SESSION_WHITELIST = 0xff0f, + IDX_BRAIN_CLIENT_FEATURES = 0xff09, + IDX_BRAIN_HOST = 0xff0a, + IDX_BRAIN_PASSWORD = 0xff0b, + IDX_BRAIN_PORT = 0xff0c, + IDX_BRAIN_SERVER = 0xff0d, + IDX_BRAIN_SERVER_TIMER = 0xff0e, + IDX_BRAIN_SESSION = 0xff0f, + IDX_BRAIN_SESSION_WHITELIST = 0xff10, #endif - IDX_CPU_AFFINITY = 0xff10, + IDX_CPU_AFFINITY = 0xff11, IDX_CUSTOM_CHARSET_1 = '1', IDX_CUSTOM_CHARSET_2 = '2', IDX_CUSTOM_CHARSET_3 = '3', IDX_CUSTOM_CHARSET_4 = '4', - IDX_DEBUG_FILE = 0xff11, - IDX_DEBUG_MODE = 0xff12, - IDX_DEPRECATED_CHECK_DISABLE = 0xff13, - IDX_ENCODING_FROM = 0xff14, - IDX_ENCODING_TO = 0xff15, - IDX_HASH_INFO = 0xff16, - IDX_FORCE = 0xff17, - IDX_HWMON_DISABLE = 0xff18, - IDX_HWMON_TEMP_ABORT = 0xff19, + IDX_DEBUG_FILE = 0xff12, + IDX_DEBUG_MODE = 0xff13, + IDX_DEPRECATED_CHECK_DISABLE = 0xff14, + IDX_ENCODING_FROM = 0xff15, + IDX_ENCODING_TO = 0xff16, + IDX_HASH_INFO = 0xff17, + IDX_FORCE = 0xff18, + IDX_HWMON_DISABLE = 0xff19, + IDX_HWMON_TEMP_ABORT = 0xff1a, IDX_HASH_MODE = 'm', - IDX_HCCAPX_MESSAGE_PAIR = 0xff1a, + IDX_HCCAPX_MESSAGE_PAIR = 0xff1b, IDX_HELP = 'h', - IDX_HEX_CHARSET = 0xff1b, - IDX_HEX_SALT = 0xff1c, - IDX_HEX_WORDLIST = 0xff1d, - IDX_HOOK_THREADS = 0xff1e, - IDX_IDENTIFY = 0xff1f, + IDX_HEX_CHARSET = 0xff1c, + IDX_HEX_SALT = 0xff1d, + IDX_HEX_WORDLIST = 0xff1e, + IDX_HOOK_THREADS = 0xff1f, + IDX_IDENTIFY = 0xff20, IDX_INCREMENT = 'i', - IDX_INCREMENT_MAX = 0xff20, - IDX_INCREMENT_MIN = 0xff21, - IDX_INDUCTION_DIR = 0xff22, - IDX_KEEP_GUESSING = 0xff23, + IDX_INCREMENT_MAX = 0xff21, + IDX_INCREMENT_MIN = 0xff22, + IDX_INDUCTION_DIR = 0xff23, + IDX_KEEP_GUESSING = 0xff24, IDX_KERNEL_ACCEL = 'n', IDX_KERNEL_LOOPS = 'u', IDX_KERNEL_THREADS = 'T', - IDX_KEYBOARD_LAYOUT_MAPPING = 0xff24, - IDX_KEYSPACE = 0xff25, - IDX_LEFT = 0xff26, + IDX_KEYBOARD_LAYOUT_MAPPING = 0xff25, + IDX_KEYSPACE = 0xff26, + IDX_LEFT = 0xff27, IDX_LIMIT = 'l', - IDX_LOGFILE_DISABLE = 0xff27, - IDX_LOOPBACK = 0xff28, - IDX_MACHINE_READABLE = 0xff29, - IDX_MARKOV_CLASSIC = 0xff2a, - IDX_MARKOV_DISABLE = 0xff2b, - IDX_MARKOV_HCSTAT2 = 0xff2c, - IDX_MARKOV_INVERSE = 0xff2d, + IDX_LOGFILE_DISABLE = 0xff28, + IDX_LOOPBACK = 0xff29, + IDX_MACHINE_READABLE = 0xff2a, + IDX_MARKOV_CLASSIC = 0xff2b, + IDX_MARKOV_DISABLE = 0xff2c, + IDX_MARKOV_HCSTAT2 = 0xff2d, + IDX_MARKOV_INVERSE = 0xff2e, IDX_MARKOV_THRESHOLD = 't', - IDX_NONCE_ERROR_CORRECTIONS = 0xff2e, + IDX_NONCE_ERROR_CORRECTIONS = 0xff2f, IDX_OPENCL_DEVICE_TYPES = 'D', IDX_OPTIMIZED_KERNEL_ENABLE = 'O', IDX_MULTIPLY_ACCEL_DISABLE = 'M', - IDX_OUTFILE_AUTOHEX_DISABLE = 0xff2f, - IDX_OUTFILE_CHECK_DIR = 0xff30, - IDX_OUTFILE_CHECK_TIMER = 0xff31, - IDX_OUTFILE_FORMAT = 0xff32, + IDX_OUTFILE_AUTOHEX_DISABLE = 0xff30, + IDX_OUTFILE_CHECK_DIR = 0xff31, + IDX_OUTFILE_CHECK_TIMER = 0xff32, + IDX_OUTFILE_FORMAT = 0xff33, IDX_OUTFILE = 'o', - IDX_POTFILE_DISABLE = 0xff33, - IDX_POTFILE_PATH = 0xff34, - IDX_PROGRESS_ONLY = 0xff35, - IDX_QUIET = 0xff36, - IDX_REMOVE = 0xff37, - IDX_REMOVE_TIMER = 0xff38, - IDX_RESTORE = 0xff39, - IDX_RESTORE_DISABLE = 0xff3a, - IDX_RESTORE_FILE_PATH = 0xff3b, + IDX_POTFILE_DISABLE = 0xff34, + IDX_POTFILE_PATH = 0xff35, + IDX_PROGRESS_ONLY = 0xff36, + IDX_QUIET = 0xff37, + IDX_REMOVE = 0xff38, + IDX_REMOVE_TIMER = 0xff39, + IDX_RESTORE = 0xff3a, + IDX_RESTORE_DISABLE = 0xff3b, + IDX_RESTORE_FILE_PATH = 0xff3c, IDX_RP_FILE = 'r', - IDX_RP_GEN_FUNC_MAX = 0xff3c, - IDX_RP_GEN_FUNC_MIN = 0xff3d, - IDX_RP_GEN_FUNC_SEL = 0xff3e, + IDX_RP_GEN_FUNC_MAX = 0xff3d, + IDX_RP_GEN_FUNC_MIN = 0xff3e, + IDX_RP_GEN_FUNC_SEL = 0xff3f, IDX_RP_GEN = 'g', - IDX_RP_GEN_SEED = 0xff3f, + IDX_RP_GEN_SEED = 0xff40, IDX_RULE_BUF_L = 'j', IDX_RULE_BUF_R = 'k', - IDX_RUNTIME = 0xff40, - IDX_SCRYPT_TMTO = 0xff41, + IDX_RUNTIME = 0xff41, + IDX_SCRYPT_TMTO = 0xff42, IDX_SEGMENT_SIZE = 'c', - IDX_SELF_TEST_DISABLE = 0xff42, + IDX_SELF_TEST_DISABLE = 0xff43, IDX_SEPARATOR = 'p', - IDX_SESSION = 0xff43, - IDX_SHOW = 0xff44, + IDX_SESSION = 0xff44, + IDX_SHOW = 0xff45, IDX_SKIP = 's', IDX_SLOW_CANDIDATES = 'S', - IDX_SPEED_ONLY = 0xff45, - IDX_SPIN_DAMP = 0xff46, - IDX_STATUS = 0xff47, - IDX_STATUS_JSON = 0xff48, - IDX_STATUS_TIMER = 0xff49, - IDX_STDOUT_FLAG = 0xff4a, - IDX_STDIN_TIMEOUT_ABORT = 0xff4b, - IDX_TRUECRYPT_KEYFILES = 0xff4c, - IDX_USERNAME = 0xff4d, - IDX_VERACRYPT_KEYFILES = 0xff4e, - IDX_VERACRYPT_PIM_START = 0xff4f, - IDX_VERACRYPT_PIM_STOP = 0xff50, + IDX_SPEED_ONLY = 0xff46, + IDX_SPIN_DAMP = 0xff47, + IDX_STATUS = 0xff48, + IDX_STATUS_JSON = 0xff49, + IDX_STATUS_TIMER = 0xff4a, + IDX_STDOUT_FLAG = 0xff4b, + IDX_STDIN_TIMEOUT_ABORT = 0xff4c, + IDX_TRUECRYPT_KEYFILES = 0xff4d, + IDX_USERNAME = 0xff4e, + IDX_VERACRYPT_KEYFILES = 0xff4f, + IDX_VERACRYPT_PIM_START = 0xff50, + IDX_VERACRYPT_PIM_STOP = 0xff51, IDX_VERSION_LOWER = 'v', IDX_VERSION = 'V', - IDX_WORDLIST_AUTOHEX_DISABLE = 0xff51, + IDX_WORDLIST_AUTOHEX_DISABLE = 0xff52, IDX_WORKLOAD_PROFILE = 'w', } user_options_map_t; @@ -1100,6 +1104,7 @@ typedef struct hc_fp #include "ext_cuda.h" #include "ext_hip.h" #include "ext_OpenCL.h" +#include "ext_metal.h" typedef struct hc_device_param { @@ -1601,6 +1606,129 @@ typedef struct hc_device_param hipDeviceptr_t hip_d_st_esalts_buf; hipDeviceptr_t hip_d_kernel_param; + // API: opencl and metal + + bool is_apple_silicon; + + // API: metal + + bool is_metal; + + #if defined (__APPLE__) + + int mtl_major; + int mtl_minor; + + int device_physical_location; + int device_location_number; + int device_registryID; + int device_max_transfer_rate; + int device_is_headless; + int device_is_low_power; + int device_is_removable; + + int metal_warp_size; + + mtl_device_id metal_device; + mtl_command_queue metal_command_queue; + + mtl_library metal_library; + mtl_library metal_library_shared; + mtl_library metal_library_mp; + mtl_library metal_library_amp; + + mtl_function metal_function1; + mtl_function metal_function12; + mtl_function metal_function2p; + mtl_function metal_function2; + mtl_function metal_function2e; + mtl_function metal_function23; + mtl_function metal_function3; + mtl_function metal_function4; + mtl_function metal_function_init2; + mtl_function metal_function_loop2p; + mtl_function metal_function_loop2; + mtl_function metal_function_mp; + mtl_function metal_function_mp_l; + mtl_function metal_function_mp_r; + mtl_function metal_function_amp; + mtl_function metal_function_tm; + mtl_function metal_function_memset; + mtl_function metal_function_bzero; + mtl_function metal_function_atinit; + mtl_function metal_function_utf8toutf16le; + mtl_function metal_function_decompress; + mtl_function metal_function_aux1; + mtl_function metal_function_aux2; + mtl_function metal_function_aux3; + mtl_function metal_function_aux4; + + mtl_pipeline metal_pipeline1; + mtl_pipeline metal_pipeline12; + mtl_pipeline metal_pipeline2p; + mtl_pipeline metal_pipeline2; + mtl_pipeline metal_pipeline2e; + mtl_pipeline metal_pipeline23; + mtl_pipeline metal_pipeline3; + mtl_pipeline metal_pipeline4; + mtl_pipeline metal_pipeline_init2; + mtl_pipeline metal_pipeline_loop2p; + mtl_pipeline metal_pipeline_loop2; + mtl_pipeline metal_pipeline_mp; + mtl_pipeline metal_pipeline_mp_l; + mtl_pipeline metal_pipeline_mp_r; + mtl_pipeline metal_pipeline_amp; + mtl_pipeline metal_pipeline_tm; + mtl_pipeline metal_pipeline_memset; + mtl_pipeline metal_pipeline_bzero; + mtl_pipeline metal_pipeline_atinit; + mtl_pipeline metal_pipeline_utf8toutf16le; + mtl_pipeline metal_pipeline_decompress; + mtl_pipeline metal_pipeline_aux1; + mtl_pipeline metal_pipeline_aux2; + mtl_pipeline metal_pipeline_aux3; + mtl_pipeline metal_pipeline_aux4; + + mtl_mem metal_d_pws_buf; + mtl_mem metal_d_pws_amp_buf; + mtl_mem metal_d_pws_comp_buf; + mtl_mem metal_d_pws_idx; + mtl_mem metal_d_rules; + mtl_mem metal_d_rules_c; + mtl_mem metal_d_combs; + mtl_mem metal_d_combs_c; + mtl_mem metal_d_bfs; + mtl_mem metal_d_bfs_c; + mtl_mem metal_d_tm_c; + mtl_mem metal_d_bitmap_s1_a; + mtl_mem metal_d_bitmap_s1_b; + mtl_mem metal_d_bitmap_s1_c; + mtl_mem metal_d_bitmap_s1_d; + mtl_mem metal_d_bitmap_s2_a; + mtl_mem metal_d_bitmap_s2_b; + mtl_mem metal_d_bitmap_s2_c; + mtl_mem metal_d_bitmap_s2_d; + mtl_mem metal_d_plain_bufs; + mtl_mem metal_d_digests_buf; + mtl_mem metal_d_digests_shown; + mtl_mem metal_d_salt_bufs; + mtl_mem metal_d_esalt_bufs; + mtl_mem metal_d_tmps; + mtl_mem metal_d_hooks; + mtl_mem metal_d_result; + mtl_mem metal_d_extra0_buf; + mtl_mem metal_d_extra1_buf; + mtl_mem metal_d_extra2_buf; + mtl_mem metal_d_extra3_buf; + mtl_mem metal_d_root_css_buf; + mtl_mem metal_d_markov_css_buf; + mtl_mem metal_d_st_digests_buf; + mtl_mem metal_d_st_salts_buf; + mtl_mem metal_d_st_esalts_buf; + mtl_mem metal_d_kernel_param; + + #endif // __APPLE__ + // API: opencl bool is_opencl; @@ -1708,6 +1836,7 @@ typedef struct backend_ctx void *cuda; void *hip; + void *mtl; void *ocl; void *nvrtc; @@ -1715,6 +1844,7 @@ typedef struct backend_ctx int backend_device_from_cuda[DEVICES_MAX]; // from cuda device index to backend device index int backend_device_from_hip[DEVICES_MAX]; // from hip device index to backend device index + int backend_device_from_metal[DEVICES_MAX]; // from metal device index to backend device index int backend_device_from_opencl[DEVICES_MAX]; // from opencl device index to backend device index int backend_device_from_opencl_platform[CL_PLATFORMS_MAX][DEVICES_MAX]; // from opencl device index to backend device index (by platform) @@ -1725,6 +1855,8 @@ typedef struct backend_ctx int cuda_devices_active; int hip_devices_cnt; int hip_devices_active; + int metal_devices_cnt; + int metal_devices_active; int opencl_devices_cnt; int opencl_devices_active; @@ -1766,6 +1898,13 @@ typedef struct backend_ctx int hip_runtimeVersion; int hip_driverVersion; + // metal + + int rc_metal_init; + + unsigned int metal_runtimeVersion; + char *metal_runtimeVersionStr; + // opencl cl_platform_id *opencl_platforms; @@ -2169,6 +2308,7 @@ typedef struct user_options bool markov_inverse; bool backend_ignore_cuda; bool backend_ignore_hip; + bool backend_ignore_metal; bool backend_ignore_opencl; bool backend_info; bool optimized_kernel_enable; diff --git a/src/Makefile b/src/Makefile index 4a5d419da..a26f5598c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -331,7 +331,11 @@ CFLAGS_NATIVE += -DMISSING_CLOCK_GETTIME endif LFLAGS_NATIVE := $(LFLAGS) +LFLAGS_NATIVE += -framework CoreFoundation +LFLAGS_NATIVE += -framework CoreGraphics +LFLAGS_NATIVE += -framework Foundation LFLAGS_NATIVE += -framework IOKit +LFLAGS_NATIVE += -framework Metal LFLAGS_NATIVE += -lpthread LFLAGS_NATIVE += -liconv @@ -385,6 +389,10 @@ EMU_OBJS_ALL += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_ci OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_hip ext_nvapi ext_nvml ext_nvrtc ext_hiprtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL) +ifeq ($(UNAME),Darwin) +OBJS_ALL += ext_metal +endif + ifeq ($(ENABLE_BRAIN),1) OBJS_ALL += brain endif @@ -585,6 +593,9 @@ uninstall: obj/%.NATIVE.o: src/%.c $(CC) -c $(CCFLAGS) $(CFLAGS_NATIVE) $< -o $@ -fpic +obj/%.NATIVE.o: src/%.m + $(CC) -c $(CCFLAGS) $(CFLAGS_NATIVE) $< -o $@ -fpic + ifeq ($(USE_SYSTEM_LZMA),0) obj/%.LZMA.NATIVE.o: $(DEPS_LZMA_PATH)/%.c $(CC) -c $(CCFLAGS) $(CFLAGS_NATIVE) $(CFLAGS_LZMA) $< -o $@ -fpic diff --git a/src/autotune.c b/src/autotune.c index bcbdd0eb3..55c829802 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -238,6 +238,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_atinit (hashcat_ctx, device_param, device_param->metal_d_pws_buf, kernel_power_max) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max) == -1) return -1; @@ -264,6 +271,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_rules_c, 0, device_param->metal_d_rules, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1; @@ -477,6 +491,17 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_buf, device_param->size_pws) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result, device_param->size_results) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tmps, device_param->size_tmps) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, device_param->size_pws) == -1) return -1; diff --git a/src/backend.c b/src/backend.c index 0aa098130..bfd092931 100644 --- a/src/backend.c +++ b/src/backend.c @@ -65,6 +65,12 @@ static bool is_same_device (const hc_device_param_t *src, const hc_device_param_ if ((src->is_hip == true) && (dst->is_hip == true)) return false; + #if defined (__APPLE__) + // Metal can't have aliases + + if ((src->is_metal == true) && (dst->is_metal == true)) return false; + #endif + // But OpenCL can have aliases if ((src->is_opencl == true) && (dst->is_opencl == true)) @@ -131,7 +137,13 @@ static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx) if (alias_device->is_hip == true) continue; - // this lets native OpenCL runtime survive over generic OpenCL runtime + #if defined (__APPLE__) + // this lets Metal devices survive over OpenCL + + if (alias_device->is_metal == true) continue; + #endif + + // this lets native OpenCL runtime survive over generic OpenCL runtime if (alias_device->opencl_device_type & CL_DEVICE_TYPE_CPU) { @@ -164,6 +176,9 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p { if (src->is_cuda != dst->is_cuda) return false; if (src->is_hip != dst->is_hip) return false; + #if defined (__APPLE__) + if (src->is_metal != dst->is_metal) return false; + #endif if (src->is_opencl != dst->is_opencl) return false; if (strcmp (src->device_name, dst->device_name) != 0) return false; @@ -655,7 +670,7 @@ void generate_source_kernel_filename (const bool slow_candidates, const u32 atta } } -void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file) +void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal) { if (opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) { @@ -663,23 +678,23 @@ void generate_cached_kernel_filename (const bool slow_candidates, const u32 atta { if (slow_candidates == true) { - snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); } else { if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (cached_file, 255, "%s/kernels/m%05d_a1-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a1-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); else if (attack_kern == ATTACK_KERN_BF) - snprintf (cached_file, 255, "%s/kernels/m%05d_a3-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a3-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); else if (attack_kern == ATTACK_KERN_NONE) - snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); } } else { - snprintf (cached_file, 255, "%s/kernels/m%05d-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); } } else @@ -688,23 +703,23 @@ void generate_cached_kernel_filename (const bool slow_candidates, const u32 atta { if (slow_candidates == true) { - snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); } else { if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (cached_file, 255, "%s/kernels/m%05d_a1-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a1-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); else if (attack_kern == ATTACK_KERN_BF) - snprintf (cached_file, 255, "%s/kernels/m%05d_a3-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a3-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); else if (attack_kern == ATTACK_KERN_NONE) - snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); } } else { - snprintf (cached_file, 255, "%s/kernels/m%05d-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum); + snprintf (cached_file, 255, "%s/kernels/m%05d-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel"); } } } @@ -714,9 +729,9 @@ void generate_source_kernel_shared_filename (char *shared_dir, char *source_file snprintf (source_file, 255, "%s/OpenCL/shared.cl", shared_dir); } -void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file) +void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file, bool is_metal) { - snprintf (cached_file, 255, "%s/kernels/shared.%s.kernel", cache_dir, device_name_chksum_amp_mp); + snprintf (cached_file, 255, "%s/kernels/shared.%s.%s", cache_dir, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel"); } void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *shared_dir, char *source_file) @@ -731,15 +746,15 @@ void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_typ } } -void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file) +void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file, bool is_metal) { if ((opti_type & OPTI_TYPE_BRUTE_FORCE) && (opts_type & OPTS_TYPE_PT_GENERATE_BE)) { - snprintf (cached_file, 255, "%s/kernels/markov_be.%s.kernel", cache_dir, device_name_chksum_amp_mp); + snprintf (cached_file, 255, "%s/kernels/markov_be.%s.%s", cache_dir, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel"); } else { - snprintf (cached_file, 255, "%s/kernels/markov_le.%s.kernel", cache_dir, device_name_chksum_amp_mp); + snprintf (cached_file, 255, "%s/kernels/markov_le.%s.%s", cache_dir, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel"); } } @@ -748,9 +763,9 @@ void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_di snprintf (source_file, 255, "%s/OpenCL/amp_a%u.cl", shared_dir, attack_kern); } -void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file) +void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file, bool is_metal) { - snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", cache_dir, attack_kern, device_name_chksum_amp_mp); + snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.%s", cache_dir, attack_kern, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel"); } int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw) @@ -779,6 +794,13 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &pw_idx, device_param->metal_d_pws_idx, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -805,6 +827,13 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, pw->i, device_param->metal_d_pws_comp_buf, off * sizeof (u32), cnt * sizeof (u32)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -868,6 +897,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tm_c, size_tm) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm) == -1) return -1; @@ -885,6 +921,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bfs_c, 0, device_param->metal_d_tm_c, 0, size_tm) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL) == -1) return -1; @@ -953,6 +996,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, device_param->metal_d_pws_amp_buf, 0, pws_cnt * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -978,6 +1028,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, pws_cnt) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->metal_d_pws_buf, pws_cnt) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, pws_cnt) == -1) return -1; @@ -1004,6 +1061,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -1052,6 +1116,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -1150,6 +1221,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -1198,6 +1276,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -1372,6 +1457,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; @@ -1591,6 +1683,140 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_ return 0; } +#if defined (__APPLE__) +int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, id buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_atinit_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_atinit; + + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + id metal_command_buffer = NULL; + id metal_command_encoder = NULL; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_atinit, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 0, buf, NULL, 0) == -1) return -1; + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 1, NULL, device_param->kernel_params_atinit[1], sizeof (u64)) == -1) return -1; + + double ms = 0; + + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1; + + return 0; +} + +int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, id buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_utf8toutf16le_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_utf8toutf16le; + + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + id metal_command_buffer = NULL; + id metal_command_encoder = NULL; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_utf8toutf16le, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 0, buf, NULL, 0) == -1) return -1; + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 1, NULL, device_param->kernel_params_utf8toutf16le[1], sizeof (u64)) == -1) return -1; + + double ms = 0; + + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1; + + return 0; +} + +int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, id buf, const u64 size) +{ + const u64 num16d = size / 16; + const u64 num16m = size % 16; + + // with apple GPU clEnqueueWriteBuffer() return CL_INVALID_VALUE, workaround + + if (num16d) + { + const u64 kernel_threads = device_param->kernel_wgs_bzero; + + u64 num_elements = round_up_multiple_32 (num16d, kernel_threads); + + id metal_command_buffer = NULL; + id metal_command_encoder = NULL; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_bzero, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 0, buf, NULL, 0) == -1) return -1; + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 1, NULL, (void *) &num16d, sizeof (u64)) == -1) return -1; + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + double ms = 0; + + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1; + } + + if (num16m) + { + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE && \ + (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK || device_param->opencl_device_vendor_id == VENDOR_ID_APPLE) && \ + device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + u8 *bzeros_apple = (u8 *) hccalloc (num16m, sizeof (u8)); + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, buf, num16d * 16, bzeros_apple, num16m) == -1) return -1; + + hcfree (bzeros_apple); + } + else + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, buf, num16d * 16, bzeros, num16m) == -1) return -1; + } + } + + return 0; +} + +int run_metal_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 offset, const u32 value, const u64 size) +{ + int rc; + + const u64 N = size / 4; + + /* check that the size is multiple of element size */ + if (size % 4 != 0) + { + return CL_INVALID_VALUE; + } + + u32 *tmp = (u32 *) hcmalloc (size); + + for (u64 i = 0; i < N; i++) + { + tmp[i] = value; + } + + rc = hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, buf, offset, tmp, size); + + hcfree (tmp); + + return rc; +} +#endif // __APPLE__ + int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num) { u64 num_elements = num; @@ -2037,6 +2263,137 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + mtl_command_encoder metal_command_encoder = NULL; + mtl_command_buffer metal_command_buffer = NULL; + mtl_pipeline metal_pipeline = NULL; + + switch (kern_run) + { + case KERN_RUN_1: metal_pipeline = device_param->metal_pipeline1; break; + case KERN_RUN_12: metal_pipeline = device_param->metal_pipeline12; break; + case KERN_RUN_2P: metal_pipeline = device_param->metal_pipeline2p; break; + case KERN_RUN_2: metal_pipeline = device_param->metal_pipeline2; break; + case KERN_RUN_2E: metal_pipeline = device_param->metal_pipeline2e; break; + case KERN_RUN_23: metal_pipeline = device_param->metal_pipeline23; break; + case KERN_RUN_3: metal_pipeline = device_param->metal_pipeline3; break; + case KERN_RUN_4: metal_pipeline = device_param->metal_pipeline4; break; + case KERN_RUN_INIT2: metal_pipeline = device_param->metal_pipeline_init2; break; + case KERN_RUN_LOOP2P: metal_pipeline = device_param->metal_pipeline_loop2p; break; + case KERN_RUN_LOOP2: metal_pipeline = device_param->metal_pipeline_loop2; break; + case KERN_RUN_AUX1: metal_pipeline = device_param->metal_pipeline_aux1; break; + case KERN_RUN_AUX2: metal_pipeline = device_param->metal_pipeline_aux2; break; + case KERN_RUN_AUX3: metal_pipeline = device_param->metal_pipeline_aux3; break; + case KERN_RUN_AUX4: metal_pipeline = device_param->metal_pipeline_aux4; break; + } + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_kernel_param, 0, &device_param->kernel_param, device_param->size_kernel_params) == -1) return -1; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + // all buffers must be allocated + int tmp_buf_cnt = 0; + mtl_mem tmp_buf[25] = { 0 }; + + for (u32 i = 0; i <= 24; i++) + { + // allocate fake buffer if NULL + if (device_param->kernel_params[i] == NULL) + { + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, sizeof (u8), NULL, &tmp_buf[tmp_buf_cnt]) == -1) return -1; + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, tmp_buf[tmp_buf_cnt], NULL, 0) == -1) return -1; + tmp_buf_cnt++; + } + else + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params[i], NULL, 0) == -1) return -1; + } + } + + if (kernel_threads == 0) kernel_threads = 1; + + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + if (kern_run == KERN_RUN_1) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_3) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_INIT2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT2) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_LOOP2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP2) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + double ms = 0; + + const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms); + + if (rc_cc != -1) + { + float exec_ms = (float) ms; + + if (event_update) + { + u32 exec_pos = device_param->exec_pos; + + device_param->exec_msec[exec_pos] = exec_ms; + + exec_pos++; + + if (exec_pos == EXEC_CACHE) + { + exec_pos = 0; + } + + device_param->exec_pos = exec_pos; + } + } + + // release tmp_buf + + for (int i = 0; i < tmp_buf_cnt; i++) + { + hc_mtlReleaseMemObject (hashcat_ctx, tmp_buf[i]); + tmp_buf[i] = NULL; + } + + if (rc_cc == -1) return -1; + } + #endif // __APPLE__ + if (device_param->is_opencl == true) { cl_kernel opencl_kernel = NULL; @@ -2294,32 +2651,108 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, hip_args, NULL) == -1) return -1; } - if (device_param->is_opencl == true) + #if defined (__APPLE__) + if (device_param->is_metal == true) { - cl_kernel opencl_kernel = NULL; + id metal_command_encoder = NULL; + id metal_command_buffer = NULL; + id metal_pipeline = NULL; switch (kern_run) { - case KERN_RUN_MP: opencl_kernel = device_param->opencl_kernel_mp; break; - case KERN_RUN_MP_R: opencl_kernel = device_param->opencl_kernel_mp_r; break; - case KERN_RUN_MP_L: opencl_kernel = device_param->opencl_kernel_mp_l; break; + case KERN_RUN_MP: metal_pipeline = device_param->metal_pipeline_mp; break; + case KERN_RUN_MP_R: metal_pipeline = device_param->metal_pipeline_mp_r; break; + case KERN_RUN_MP_L: metal_pipeline = device_param->metal_pipeline_mp_l; break; } - switch (kern_run) + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + if (kern_run == KERN_RUN_MP) { - case KERN_RUN_MP: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp[4]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp[5]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp[6]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp[7]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp[8]) == -1) return -1; - break; - case KERN_RUN_MP_R: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_r[4]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_r[5]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_r[6]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_r[7]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp_r[8]) == -1) return -1; + for (int i = 0; i < 3; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_mp[i], NULL, 0) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_mp[3], sizeof (u64)) == -1) return -1; + + for (int i = 4; i < 8; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, NULL, device_param->kernel_params_mp[i], sizeof (u32)) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 8, NULL, device_param->kernel_params_mp[8], sizeof (u64)) == -1) return -1; + } + else if (kern_run == KERN_RUN_MP_R) + { + for (int i = 0; i < 3; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_mp_r[i], NULL, 0) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_mp_r[3], sizeof (u64)) == -1) return -1; + + for (int i = 4; i < 8; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, NULL, device_param->kernel_params_mp_r[i], sizeof (u32)) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 8, NULL, device_param->kernel_params_mp_r[8], sizeof (u64)) == -1) return -1; + } + else if (kern_run == KERN_RUN_MP_L) + { + for (int i = 0; i < 3; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_mp_l[i], NULL, 0) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_mp_l[3], sizeof (u64)) == -1) return -1; + + for (int i = 4; i < 9; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, NULL, device_param->kernel_params_mp_l[i], sizeof (u32)) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 9, NULL, device_param->kernel_params_mp_l[9], sizeof (u64)) == -1) return -1; + } + + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + double ms = 0; + + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1; + } + #endif // __APPLE__ + + if (device_param->is_opencl == true) + { + cl_kernel opencl_kernel = NULL; + + switch (kern_run) + { + case KERN_RUN_MP: opencl_kernel = device_param->opencl_kernel_mp; break; + case KERN_RUN_MP_R: opencl_kernel = device_param->opencl_kernel_mp_r; break; + case KERN_RUN_MP_L: opencl_kernel = device_param->opencl_kernel_mp_l; break; + } + + switch (kern_run) + { + case KERN_RUN_MP: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp[4]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp[5]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp[6]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp[7]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp[8]) == -1) return -1; + break; + case KERN_RUN_MP_R: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_r[4]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_r[5]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_r[6]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_r[7]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp_r[8]) == -1) return -1; break; case KERN_RUN_MP_L: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]) == -1) return -1; if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_l[4]) == -1) return -1; @@ -2362,6 +2795,29 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + id metal_command_encoder = NULL; + id metal_command_buffer = NULL; + id metal_pipeline = device_param->metal_pipeline_tm; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + for (int i = 0; i < 2; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_tm[i], NULL, 0) == -1) return -1; + } + + double ms = 0; + + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1; + } + #endif // __APPLE__ + if (device_param->is_opencl == true) { cl_kernel cuda_kernel = device_param->opencl_kernel_tm; @@ -2401,6 +2857,62 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + id metal_command_encoder = NULL; + id metal_command_buffer = NULL; + id metal_pipeline = device_param->metal_pipeline_amp; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + // all buffers must be allocated + int tmp_buf_cnt = 0; + + mtl_mem tmp_buf[5] = { 0 }; + + for (int i = 0; i < 5; i++) + { + // allocate fake buffer if NULL + if (device_param->kernel_params_amp[i] == NULL) + { + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, sizeof (u8), NULL, &tmp_buf[tmp_buf_cnt]) == -1) return -1; + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, tmp_buf[tmp_buf_cnt], NULL, 0) == -1) return -1; + + tmp_buf_cnt++; + } + else + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_amp[i], NULL, 0) == -1) return -1; + } + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 5, NULL, device_param->kernel_params_amp[5], sizeof (u32)) == -1) return -1; + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 6, NULL, device_param->kernel_params_amp[6], sizeof (u64)) == -1) return -1; + + double ms = 0; + + const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms); + + // release tmp_buf + + for (int i = 0; i < tmp_buf_cnt; i++) + { + hc_mtlReleaseMemObject (hashcat_ctx, tmp_buf[i]); + + tmp_buf[i] = NULL; + } + + if (rc_cc == -1) return -1; + } + #endif // __APPLE__ + if (device_param->is_opencl == true) { num_elements = round_up_multiple_64 (num_elements, kernel_threads); @@ -2444,6 +2956,32 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + num_elements = round_up_multiple_32 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + id metal_command_buffer = NULL; + id metal_command_encoder = NULL; + + if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_decompress, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; + + for (int i = 0; i < 3; i++) + { + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_decompress[i], NULL, 0) == -1) return -1; + } + + if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_decompress[3], sizeof (u64)) == -1) return -1; + + double ms = 0; + + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1; + } + #endif // __APPLE__ + if (device_param->is_opencl == true) { num_elements = round_up_multiple_64 (num_elements, kernel_threads); @@ -2512,6 +3050,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -2560,6 +3114,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -2642,6 +3212,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -2690,6 +3276,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -2736,6 +3338,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -2967,6 +3585,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_rules_c, 0, device_param->metal_d_rules, innerloop_pos * sizeof (kernel_rule_t), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1; @@ -3092,6 +3717,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1; @@ -3115,6 +3747,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->metal_d_combs, 0, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -3138,6 +3777,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->metal_d_combs, 0, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -3264,6 +3910,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1; @@ -3287,6 +3940,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->metal_d_combs, 0, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -3312,6 +3972,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bfs_c, 0, device_param->metal_d_bfs, 0, innerloop_left * sizeof (bf_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL) == -1) return -1; @@ -3720,6 +4387,44 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) } } + /** + * Init Metal runtime + */ + + int rc_metal_init = -1; + + #if defined (__APPLE__) + if (user_options->backend_ignore_metal == false) + { + MTL_PTR *mtl = (MTL_PTR *) hcmalloc (sizeof (MTL_PTR)); + + backend_ctx->mtl = mtl; + + rc_metal_init = mtl_init (hashcat_ctx); + + if (rc_metal_init == 0) + { + size_t version_len = 0; + + if (hc_mtlRuntimeGetVersionString (hashcat_ctx, NULL, &version_len) == -1) return -1; + + if (version_len == 0) return -1; + + backend_ctx->metal_runtimeVersionStr = (char *) hcmalloc (version_len + 1); + + if (hc_mtlRuntimeGetVersionString (hashcat_ctx, backend_ctx->metal_runtimeVersionStr, &version_len) == -1) return -1; + } + else + { + rc_metal_init = -1; + + backend_ctx->rc_metal_init = rc_metal_init; + + mtl_close (hashcat_ctx); + } + } + #endif // __APPLE__ + /** * Load and map OpenCL library calls */ @@ -3743,9 +4448,9 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) * return if both CUDA and OpenCL initialization failed */ - if ((rc_cuda_init == -1) && (rc_hip_init == -1) && (rc_ocl_init == -1)) + if ((rc_cuda_init == -1) && (rc_hip_init == -1) && (rc_ocl_init == -1) && (rc_metal_init == -1)) { - event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, HIP or CUDA installation found."); + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, Metal, HIP or CUDA installation found."); event_log_warning (hashcat_ctx, "You are probably missing the CUDA, HIP or OpenCL runtime installation."); event_log_warning (hashcat_ctx, NULL); @@ -3973,7 +4678,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) if (IGNORE_DEVICE_NOT_FOUND) { - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + //backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; @@ -4068,9 +4773,13 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) * Final checks */ - if ((backend_ctx->cuda == NULL) && (backend_ctx->hip == NULL) && (backend_ctx->ocl == NULL)) + if ((backend_ctx->cuda == NULL) && (backend_ctx->hip == NULL) && (backend_ctx->ocl == NULL) && (backend_ctx->mtl == NULL)) { - event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible, HIP-compatible or CUDA-compatible platform found."); + #if defined (__APPLE__) + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, Metal, HIP or CUDA compatible platform found."); + #else + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, HIP or CUDA compatible platform found."); + #endif event_log_warning (hashcat_ctx, "You are probably missing the OpenCL, CUDA or HIP runtime installation."); event_log_warning (hashcat_ctx, NULL); @@ -4187,9 +4896,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->is_cuda = true; device_param->is_hip = false; + device_param->is_metal = false; device_param->is_opencl = false; - device_param->use_opencl12 = false; device_param->use_opencl20 = false; device_param->use_opencl21 = false; @@ -4442,16 +5151,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_param->sm_major < 5) { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " CUDA compute capability version 5.0 (Maxwell) or higher."); + if (user_options->quiet == false) + { + event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); + event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); + event_log_warning (hashcat_ctx, " CUDA compute capability version 5.0 (Maxwell) or higher."); + } } if (device_param->kernel_exec_timeout != 0) { - if (user_options->quiet == false) event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); - if (user_options->quiet == false) event_log_advice (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); - if (user_options->quiet == false) event_log_advice (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + if (user_options->quiet == false) + { + event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); + event_log_advice (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); + event_log_advice (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + } } } @@ -4566,6 +5281,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->is_cuda = false; device_param->is_hip = true; + device_param->is_metal = false; device_param->is_opencl = false; device_param->use_opencl12 = false; @@ -4826,16 +5542,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_param->sm_major < 5) { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " CUDA compute capability version 5.0 (Maxwell) or higher."); + if (user_options->quiet == false) + { + event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); + event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); + event_log_warning (hashcat_ctx, " CUDA compute capability version 5.0 (Maxwell) or higher."); + } } if (device_param->kernel_exec_timeout != 0) { - if (user_options->quiet == false) event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); - if (user_options->quiet == false) event_log_advice (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); - if (user_options->quiet == false) event_log_advice (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + if (user_options->quiet == false) + { + event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); + event_log_advice (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); + event_log_advice (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + } } } @@ -4903,4243 +5625,5050 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) backend_ctx->hip_devices_cnt = hip_devices_cnt; backend_ctx->hip_devices_active = hip_devices_active; - // OCL + // Metal - int opencl_devices_cnt = 0; - int opencl_devices_active = 0; + int metal_devices_cnt = 0; + int metal_devices_active = 0; - if (backend_ctx->ocl) + #if defined (__APPLE__) + if (backend_ctx->mtl) { - /** - * OpenCL devices: simply push all devices from all platforms into the same device array - */ - - cl_uint opencl_platforms_cnt = backend_ctx->opencl_platforms_cnt; - cl_device_id **opencl_platforms_devices = backend_ctx->opencl_platforms_devices; - cl_uint *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt; - cl_uint *opencl_platforms_vendor_id = backend_ctx->opencl_platforms_vendor_id; - char **opencl_platforms_version = backend_ctx->opencl_platforms_version; + // device count - for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++) + if (hc_mtlDeviceGetCount (hashcat_ctx, &metal_devices_cnt) == -1) { - cl_device_id *opencl_platform_devices = opencl_platforms_devices[opencl_platforms_idx]; - cl_uint opencl_platform_devices_cnt = opencl_platforms_devices_cnt[opencl_platforms_idx]; - cl_uint opencl_platform_vendor_id = opencl_platforms_vendor_id[opencl_platforms_idx]; - char *opencl_platform_version = opencl_platforms_version[opencl_platforms_idx]; + mtl_close (hashcat_ctx); + } - for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++) - { - const u32 device_id = backend_devices_idx; + backend_ctx->metal_devices_cnt = metal_devices_cnt; - hc_device_param_t *device_param = &devices_param[device_id]; + // device specific - device_param->device_id = device_id; + for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++, backend_devices_idx++) + { + const u32 device_id = backend_devices_idx; - backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx; + hc_device_param_t *device_param = &devices_param[backend_devices_idx]; - backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = backend_devices_idx; + device_param->device_id = device_id; - device_param->opencl_platform_vendor_id = opencl_platform_vendor_id; + backend_ctx->backend_device_from_metal[metal_devices_idx] = backend_devices_idx; - device_param->opencl_device = opencl_platform_devices[opencl_platform_devices_idx]; + mtl_device_id metal_device = NULL; - //device_param->opencl_platform = opencl_platform; + if (hc_mtlDeviceGet (hashcat_ctx, &metal_device, metal_devices_idx) == -1) + { + device_param->skipped = true; + continue; + } - device_param->is_cuda = false; - device_param->is_hip = false; - device_param->is_opencl = true; + device_param->metal_device = metal_device; - // store opencl platform i + device_param->is_cuda = false; + device_param->is_hip = false; + device_param->is_metal = true; + device_param->is_opencl = false; - device_param->opencl_platform_id = opencl_platforms_idx; + device_param->use_opencl12 = false; + device_param->use_opencl20 = false; + device_param->use_opencl21 = false; - // check OpenCL version + device_param->is_apple_silicon = is_apple_silicon(); - device_param->use_opencl12 = false; - device_param->use_opencl20 = false; - device_param->use_opencl21 = false; + // some attributes have to be hardcoded values because they are used for instance in the build options - int opencl_version_min = 0; - int opencl_version_maj = 0; + device_param->device_local_mem_type = CL_LOCAL; + device_param->opencl_device_type = CL_DEVICE_TYPE_GPU; + device_param->opencl_device_vendor_id = VENDOR_ID_APPLE; + device_param->opencl_platform_vendor_id = VENDOR_ID_APPLE; - if (sscanf (opencl_platform_version, "OpenCL %d.%d", &opencl_version_min, &opencl_version_maj) == 2) - { - if ((opencl_version_min == 1) && (opencl_version_maj == 2)) - { - device_param->use_opencl12 = true; - } - else if ((opencl_version_min == 2) && (opencl_version_maj == 0)) - { - device_param->use_opencl20 = true; - } - else if ((opencl_version_min == 2) && (opencl_version_maj == 1)) - { - device_param->use_opencl21 = true; - } - } + // or in the cached kernel checksum - size_t param_value_size = 0; + device_param->opencl_device_version = ""; + device_param->opencl_driver_version = ""; - // opencl_device_type + // or just to make sure they are not NULL - cl_device_type opencl_device_type; + device_param->opencl_device_vendor = strdup ("Apple"); + device_param->opencl_device_c_version = ""; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL) == -1) - { - device_param->skipped = true; - continue; - } + // sm_minor, sm_major - opencl_device_type &= ~CL_DEVICE_TYPE_DEFAULT; + int mtl_major = 0; + int mtl_minor = 0; - device_param->opencl_device_type = opencl_device_type; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_major, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // device_name + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_minor, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // try CL_DEVICE_BOARD_NAME_AMD first, if it fails fall back to CL_DEVICE_NAME - // since AMD ROCm does not identify itself at this stage we simply check for return code from clGetDeviceInfo() + device_param->mtl_major = mtl_major; + device_param->mtl_minor = mtl_minor; - #define CHECK_BOARD_NAME_AMD 1 + // device_name - cl_int rc_board_name_amd = CL_INVALID_VALUE; + char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY); - if (CHECK_BOARD_NAME_AMD) - { - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + if (hc_mtlDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, metal_device) == -1) + { + device_param->skipped = true; + hcfree (device_name); + continue; + } - OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; + device_param->device_name = device_name; - rc_board_name_amd = ocl->clGetDeviceInfo (device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, NULL); - } + hc_string_trim_leading (device_name); - if (rc_board_name_amd == CL_SUCCESS) - { - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, ¶m_value_size) == -1) - { - device_param->skipped = true; - continue; - } + hc_string_trim_trailing (device_name); - char *device_name = (char *) hcmalloc (param_value_size); + // device_processors - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, param_value_size, device_name, NULL) == -1) - { - device_param->skipped = true; - hcfree (device_name); - continue; - } + int device_processors = 0; - device_param->device_name = device_name; - } - else - { - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, 0, NULL, ¶m_value_size) == -1) - { - device_param->skipped = true; - continue; - } + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_processors, MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - char *device_name = (char *) hcmalloc (param_value_size); + device_param->device_processors = device_processors; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, param_value_size, device_name, NULL) == -1) - { - device_param->skipped = true; - hcfree (device_name); - continue; - } + // device_host_unified_memory - device_param->device_name = device_name; - } + int device_host_unified_memory = 0; - hc_string_trim_leading (device_param->device_name); + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_host_unified_memory, MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - hc_string_trim_trailing (device_param->device_name); + device_param->device_host_unified_memory = device_host_unified_memory; - // device_vendor + // device_global_mem, device_available_mem - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, 0, NULL, ¶m_value_size) == -1) - { - device_param->skipped = true; - continue; - } + size_t bytes = 0; - char *opencl_device_vendor = (char *) hcmalloc (param_value_size); + if (hc_mtlDeviceTotalMem (hashcat_ctx, &bytes, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, param_value_size, opencl_device_vendor, NULL) == -1) - { - device_param->skipped = true; - hcfree (opencl_device_vendor); - continue; - } + device_param->device_global_mem = (u64) bytes; - device_param->opencl_device_vendor = opencl_device_vendor; + device_param->device_available_mem = 0; - cl_uint opencl_device_vendor_id = 0; + // device_maxmem_alloc - if (strcmp (opencl_device_vendor, CL_VENDOR_AMD1) == 0) - { - opencl_device_vendor_id = VENDOR_ID_AMD; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD2) == 0) - { - opencl_device_vendor_id = VENDOR_ID_AMD; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0) - { - opencl_device_vendor_id = VENDOR_ID_AMD_USE_INTEL; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE) == 0) - { - opencl_device_vendor_id = VENDOR_ID_APPLE; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0) - { - opencl_device_vendor_id = VENDOR_ID_AMD; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_NV) == 0) - { - opencl_device_vendor_id = VENDOR_ID_NV; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0) - { - opencl_device_vendor_id = VENDOR_ID_INTEL_SDK; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL2) == 0) - { - opencl_device_vendor_id = VENDOR_ID_INTEL_SDK; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0) - { - opencl_device_vendor_id = VENDOR_ID_INTEL_BEIGNET; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_SDK) == 0) - { - opencl_device_vendor_id = VENDOR_ID_INTEL_SDK; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_MESA) == 0) - { - opencl_device_vendor_id = VENDOR_ID_MESA; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_NV) == 0) - { - opencl_device_vendor_id = VENDOR_ID_NV; - } - else if (strcmp (opencl_device_vendor, CL_VENDOR_POCL) == 0) - { - opencl_device_vendor_id = VENDOR_ID_POCL; - } - else - { - opencl_device_vendor_id = VENDOR_ID_GENERIC; - } + size_t device_maxmem_alloc = 0; - device_param->opencl_device_vendor_id = opencl_device_vendor_id; + if (hc_mtlDeviceMaxMemAlloc (hashcat_ctx, &device_maxmem_alloc, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // device_version + device_param->device_maxmem_alloc = device_maxmem_alloc; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, 0, NULL, ¶m_value_size) == -1) - { - device_param->skipped = true; - continue; - } + if (device_host_unified_memory == 1) device_param->device_maxmem_alloc /= 2; - char *opencl_device_version = (char *) hcmalloc (param_value_size); + // warp size - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, param_value_size, opencl_device_version, NULL) == -1) - { - device_param->skipped = true; - hcfree (opencl_device_version); - continue; - } + int metal_warp_size = 0; - device_param->opencl_device_version = opencl_device_version; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &metal_warp_size, MTL_DEVICE_ATTRIBUTE_WARP_SIZE, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // opencl_device_c_version + device_param->metal_warp_size = metal_warp_size; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, ¶m_value_size) == -1) - { - device_param->skipped = true; - continue; - } + // device_maxworkgroup_size - char *opencl_device_c_version = (char *) hcmalloc (param_value_size); + int device_maxworkgroup_size = 0; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, opencl_device_c_version, NULL) == -1) - { - device_param->skipped = true; - hcfree (opencl_device_c_version); - continue; - } - - device_param->opencl_device_c_version = opencl_device_c_version; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // max_compute_units + device_param->device_maxworkgroup_size = device_maxworkgroup_size; - cl_uint device_processors = 0; + // max_clock_frequency - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL) == -1) - { - device_param->skipped = true; - continue; - } + int device_maxclock_frequency = 0; - device_param->device_processors = device_processors; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, MTL_DEVICE_ATTRIBUTE_CLOCK_RATE, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // device_host_unified_memory + device_param->device_maxclock_frequency = device_maxclock_frequency / 1000; - cl_bool device_host_unified_memory = false; + // pcie_bus, pcie_device, pcie_function - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->pcie_domain = 0; + device_param->pcie_bus = 0; + device_param->pcie_device = 0; + device_param->pcie_function = 0; - device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0; + int device_physical_location = 0; - // device_global_mem + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_physical_location, MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - cl_ulong device_global_mem = 0; + device_param->device_physical_location = device_physical_location; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL) == -1) - { - device_param->skipped = true; - continue; - } + int device_location_number = 0; - device_param->device_global_mem = device_global_mem; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_location_number, MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - device_param->device_available_mem = 0; + device_param->device_location_number = device_location_number; - // device_maxmem_alloc + int device_max_transfer_rate = 0; - cl_ulong device_maxmem_alloc = 0; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_max_transfer_rate, MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->device_max_transfer_rate = device_max_transfer_rate; - device_param->device_maxmem_alloc = device_maxmem_alloc; + int device_registryID = 0; - if (device_param->device_host_unified_memory == 1) - { - // so, we actually have only half the memory because we need the same buffers on host side + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_registryID, MTL_DEVICE_ATTRIBUTE_REGISTRY_ID, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - device_param->device_maxmem_alloc /= 2; - } + device_param->device_registryID = device_registryID; - // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes - // testwise disabling that - //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff); + // kernel_exec_timeout - // max_work_group_size + device_param->kernel_exec_timeout = 0; - size_t device_maxworkgroup_size = 0; + // wgs_multiple - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->kernel_preferred_wgs_multiple = metal_warp_size; - device_param->device_maxworkgroup_size = device_maxworkgroup_size; + // max_shared_memory_per_block - // max_clock_frequency + int max_shared_memory_per_block = 0; - cl_uint device_maxclock_frequency = 0; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL) == -1) - { - device_param->skipped = true; - continue; - } + if (max_shared_memory_per_block < 32768) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1); - device_param->device_maxclock_frequency = device_maxclock_frequency; + device_param->skipped = true; + } - // device_endian_little + device_param->device_local_mem_size = max_shared_memory_per_block; - cl_bool device_endian_little = CL_FALSE; + // device_max_constant_buffer_size - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL) == -1) - { - device_param->skipped = true; - continue; - } + int device_max_constant_buffer_size = 0; - if (device_endian_little == CL_FALSE) - { - event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1); + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - device_param->skipped = true; - } + if (device_max_constant_buffer_size < 65536) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1); - // device_available + device_param->skipped = true; + } - cl_bool device_available = CL_FALSE; + // gpu properties - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL) == -1) - { - device_param->skipped = true; - continue; - } + int device_is_headless = 0; - if (device_available == CL_FALSE) - { - event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1); + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_headless, MTL_DEVICE_ATTRIBUTE_HEADLESS, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - device_param->skipped = true; - } + device_param->device_is_headless = device_is_headless; - // device_compiler_available + int device_is_low_power = 0; - cl_bool device_compiler_available = CL_FALSE; + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_low_power, MTL_DEVICE_ATTRIBUTE_LOW_POWER, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->device_is_low_power = device_is_low_power; - if (device_compiler_available == CL_FALSE) - { - event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1); + int device_is_removable = 0; - device_param->skipped = true; - } + if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_removable, MTL_DEVICE_ATTRIBUTE_REMOVABLE, metal_device) == -1) + { + device_param->skipped = true; + continue; + } - // device_execution_capabilities + device_param->device_is_removable = device_is_removable; - cl_device_exec_capabilities device_execution_capabilities; + // skipped - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL) == -1) - { - device_param->skipped = true; - continue; - } + if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0) + { + device_param->skipped = true; + } - if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0) - { - event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1); + if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0) + { + device_param->skipped = true; + } - device_param->skipped = true; - } + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_APPLE)) + { + need_iokit = true; + } - // device_extensions + // CPU burning loop damper + // Value is given as number between 0-100 + // By default 8% + // in theory not needed with Metal - size_t device_extensions_size; + device_param->spin_damp = 0; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size) == -1) + // common driver check + /* + if (device_param->skipped == false) + { + if ((user_options->force == false) && (user_options->backend_info == false)) { - device_param->skipped = true; - continue; } - char *device_extensions = (char *) hcmalloc (device_extensions_size + 1); + // activate device moved below, at end + }*/ - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL) == -1) - { - device_param->skipped = true; - hcfree (device_extensions); - continue; - } + // instruction set - if (strstr (device_extensions, "base_atomics") == 0) - { - event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1); + device_param->has_add = false; + device_param->has_addc = false; + device_param->has_sub = false; + device_param->has_subc = false; + device_param->has_bfe = false; + device_param->has_lop3 = false; + device_param->has_mov64 = false; + device_param->has_prmt = false; - device_param->skipped = true; - } + // check if we need skip device - if (strstr (device_extensions, "byte_addressable_store") == 0) - { - event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1); + if (device_param->device_processors == 1) device_param->skipped = true; - device_param->skipped = true; - } + /** + * activate device + */ - hcfree (device_extensions); + if (device_param->skipped == false) metal_devices_active++; + } + } + #endif // __APPLE__ - // kernel_preferred_wgs_multiple + backend_ctx->metal_devices_cnt = metal_devices_cnt; + backend_ctx->metal_devices_active = metal_devices_active; - // There is global query for this attribute on OpenCL that is not linked to a specific kernel, so we set it to a fixed value - // Later in the code, we add vendor specific extensions to query it + // OCL - device_param->kernel_preferred_wgs_multiple = 8; + int opencl_devices_cnt = 0; + int opencl_devices_active = 0; - // device_local_mem_type + if (backend_ctx->ocl) + { + /** + * OpenCL devices: simply push all devices from all platforms into the same device array + */ - cl_device_local_mem_type device_local_mem_type; + cl_uint opencl_platforms_cnt = backend_ctx->opencl_platforms_cnt; + cl_device_id **opencl_platforms_devices = backend_ctx->opencl_platforms_devices; + cl_uint *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt; + cl_uint *opencl_platforms_vendor_id = backend_ctx->opencl_platforms_vendor_id; + char **opencl_platforms_version = backend_ctx->opencl_platforms_version; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL) == -1) - { - device_param->skipped = true; - continue; - } + for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++) + { + cl_device_id *opencl_platform_devices = opencl_platforms_devices[opencl_platforms_idx]; + cl_uint opencl_platform_devices_cnt = opencl_platforms_devices_cnt[opencl_platforms_idx]; + cl_uint opencl_platform_vendor_id = opencl_platforms_vendor_id[opencl_platforms_idx]; + char *opencl_platform_version = opencl_platforms_version[opencl_platforms_idx]; - device_param->device_local_mem_type = device_local_mem_type; + for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++) + { + const u32 device_id = backend_devices_idx; - // device_max_constant_buffer_size + hc_device_param_t *device_param = &devices_param[device_id]; - cl_ulong device_max_constant_buffer_size; + device_param->device_id = device_id; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL) == -1) - { - device_param->skipped = true; - continue; - } + backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx; - if (device_local_mem_type == CL_LOCAL) - { - if (device_max_constant_buffer_size < 65536) - { - event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1); + backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = backend_devices_idx; - device_param->skipped = true; - } - } + device_param->opencl_platform_vendor_id = opencl_platform_vendor_id; - // device_local_mem_size + device_param->opencl_device = opencl_platform_devices[opencl_platform_devices_idx]; - cl_ulong device_local_mem_size = 0; + //device_param->opencl_platform = opencl_platform; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->is_cuda = false; + device_param->is_hip = false; + device_param->is_metal = false; + device_param->is_opencl = true; - if (device_local_mem_type == CL_LOCAL) - { - if (device_local_mem_size < 32768) - { - event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1); + // store opencl platform i - device_param->skipped = true; - } - } + device_param->opencl_platform_id = opencl_platforms_idx; - // workaround inc! - // allocating all reported local memory causes jit to fail with: SC failed. No reason given. - // if we limit ourself to 32k it seems to work + // check OpenCL version - if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + device_param->use_opencl12 = false; + device_param->use_opencl20 = false; + device_param->use_opencl21 = false; + + int opencl_version_min = 0; + int opencl_version_maj = 0; + + if (sscanf (opencl_platform_version, "OpenCL %d.%d", &opencl_version_min, &opencl_version_maj) == 2) { - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + if ((opencl_version_min == 1) && (opencl_version_maj == 2)) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - device_local_mem_size = MIN (device_local_mem_size, 32768); - } + device_param->use_opencl12 = true; + } + else if ((opencl_version_min == 2) && (opencl_version_maj == 0)) + { + device_param->use_opencl20 = true; + } + else if ((opencl_version_min == 2) && (opencl_version_maj == 1)) + { + device_param->use_opencl21 = true; } } - device_param->device_local_mem_size = device_local_mem_size; - - // handling known bugs on POCL - - // POCL < 1.9 doesn't like quotes in the include path, see: - // https://github.com/hashcat/hashcat/issues/2950 - // https://github.com/pocl/pocl/issues/962 + size_t param_value_size = 0; - // POCL < 1.5 and older LLVM versions are known to fail compiling kernels - // https://github.com/hashcat/hashcat/issues/2344 + // opencl_device_type - // we need to inform the user to update + cl_device_type opencl_device_type; - if (opencl_platform_vendor_id == VENDOR_ID_POCL) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL) == -1) { - char *pocl_version_ptr = strstr (opencl_platform_version, "pocl "); - char *llvm_version_ptr = strstr (opencl_platform_version, "LLVM "); - - if ((pocl_version_ptr != NULL) && (llvm_version_ptr != NULL)) - { - bool pocl_skip = false; - - int pocl_maj = 0; - int pocl_min = 0; - - int pocl_bug_whitespace_on_path = 0; - int pocl_bug_kernel_compiling_failure = 0; - - const int res1 = sscanf (pocl_version_ptr, "pocl %d.%d", &pocl_maj, &pocl_min); - - if (res1 == 2) - { - const int pocl_version = (pocl_maj * 100) + pocl_min; - - if (pocl_version < 109) - { - if (strchr (folder_config->cpath_real, ' ') != NULL) - { - pocl_skip = true; - pocl_bug_whitespace_on_path = 1; - } + device_param->skipped = true; + continue; + } - if (pocl_version < 105) - { - pocl_skip = true; - pocl_bug_kernel_compiling_failure = 1; - } - } - } + opencl_device_type &= ~CL_DEVICE_TYPE_DEFAULT; - int llvm_maj = 0; - int llvm_min = 0; + device_param->opencl_device_type = opencl_device_type; - const int res2 = sscanf (llvm_version_ptr, "LLVM %d.%d", &llvm_maj, &llvm_min); + // device_name - if (res2 == 2) - { - const int llvm_version = (llvm_maj * 100) + llvm_min; + // try CL_DEVICE_BOARD_NAME_AMD first, if it fails fall back to CL_DEVICE_NAME + // since AMD ROCm does not identify itself at this stage we simply check for return code from clGetDeviceInfo() - if (llvm_version < 900) - { - pocl_skip = true; - } - } + #define CHECK_BOARD_NAME_AMD 1 - if (pocl_skip == true) - { - if (user_options->force == false) - { - event_log_error (hashcat_ctx, "* Device #%u: Outdated POCL OpenCL driver detected!", device_id + 1); + cl_int rc_board_name_amd = CL_INVALID_VALUE; - if (user_options->quiet == false) - { - if (pocl_bug_kernel_compiling_failure == 1) - { - event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives."); - } + if (CHECK_BOARD_NAME_AMD) + { + //backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - if (pocl_bug_whitespace_on_path == 1) - { - event_log_warning (hashcat_ctx, "Consider moving hashcat to a path with no spaces if you want to use this POCL version."); - } + OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; - event_log_warning (hashcat_ctx, "We recommend using a version of POCL >= 1.9"); - event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors."); - event_log_warning (hashcat_ctx, NULL); - } + rc_board_name_amd = ocl->clGetDeviceInfo (device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, NULL); + } - device_param->skipped = true; - } - } + if (rc_board_name_amd == CL_SUCCESS) + { + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, ¶m_value_size) == -1) + { + device_param->skipped = true; + continue; } - } - char *opencl_device_version_lower = hcstrdup (opencl_device_version); + char *device_name = (char *) hcmalloc (param_value_size); - lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower)); + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, param_value_size, device_name, NULL) == -1) + { + device_param->skipped = true; + hcfree (device_name); + continue; + } - if ((strstr (opencl_device_version_lower, "beignet ")) - || (strstr (opencl_device_version_lower, " beignet")) - || (strstr (opencl_device_version_lower, "mesa ")) - || (strstr (opencl_device_version_lower, " mesa"))) + device_param->device_name = device_name; + } + else { - // BEIGNET: https://github.com/hashcat/hashcat/issues/2243 - // MESA: https://github.com/hashcat/hashcat/issues/2269 - - if (user_options->force == false) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, 0, NULL, ¶m_value_size) == -1) { - event_log_error (hashcat_ctx, "* Device #%u: Unstable OpenCL driver detected!", device_id + 1); + device_param->skipped = true; + continue; + } - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives."); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors."); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL); + char *device_name = (char *) hcmalloc (param_value_size); + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, param_value_size, device_name, NULL) == -1) + { device_param->skipped = true; + hcfree (device_name); + continue; } + + device_param->device_name = device_name; } - hcfree (opencl_device_version_lower); + hc_string_trim_leading (device_param->device_name); - // Since some times we get reports from users about not working hashcat, dropping error messages like: - // CL_INVALID_COMMAND_QUEUE and CL_OUT_OF_RESOURCES - // Turns out that this is caused by Intel OpenCL runtime handling their GPU devices - // Disable such devices unless the user forces to use it - // This is successfully workaround with new threading model and new memory management - // Tested on Windows 10 - // OpenCL.Version.: OpenCL C 2.1 - // Driver.Version.: 23.20.16.4973 + hc_string_trim_trailing (device_param->device_name); - /* - #if !defined (__APPLE__) - if (opencl_device_type & CL_DEVICE_TYPE_GPU) - { - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_BEIGNET)) - { - if (user_options->force == false) - { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " We are waiting for updated OpenCL drivers from Intel."); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors."); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL); + // device_vendor - device_param->skipped = true; - } - } + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, 0, NULL, ¶m_value_size) == -1) + { + device_param->skipped = true; + continue; } - #endif // __APPLE__ - */ - // skipped + char *opencl_device_vendor = (char *) hcmalloc (param_value_size); - if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, param_value_size, opencl_device_vendor, NULL) == -1) { device_param->skipped = true; - } + hcfree (opencl_device_vendor); + continue; + } - if ((backend_ctx->opencl_device_types_filter & (opencl_device_type)) == 0) + device_param->opencl_device_vendor = opencl_device_vendor; + + cl_uint opencl_device_vendor_id = 0; + + if (strcmp (opencl_device_vendor, CL_VENDOR_AMD1) == 0) { - device_param->skipped = true; + opencl_device_vendor_id = VENDOR_ID_AMD; } - - #if defined (__APPLE__) - if (opencl_device_type & CL_DEVICE_TYPE_GPU) + else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD2) == 0) { - //if (user_options->force == false) - if (device_param->skipped == false) - { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Apple's OpenCL drivers (GPU) are known to be unreliable.", device_id + 1); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " You have been warned."); - //if (user_options->quiet == false) event_log_warning (hashcat_ctx, " There are many reports of false negatives and other issues."); - //if (user_options->quiet == false) event_log_warning (hashcat_ctx, " This is not a hashcat issue. Other projects report issues with these drivers."); - //if (user_options->quiet == false) event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors. You have been warned."); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL); - - //device_param->skipped = true; - } + opencl_device_vendor_id = VENDOR_ID_AMD; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0) + { + opencl_device_vendor_id = VENDOR_ID_AMD_USE_INTEL; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE) == 0) + { + opencl_device_vendor_id = VENDOR_ID_APPLE; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0) + { + opencl_device_vendor_id = VENDOR_ID_AMD; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_NV) == 0) + { + opencl_device_vendor_id = VENDOR_ID_NV; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0) + { + opencl_device_vendor_id = VENDOR_ID_INTEL_SDK; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL2) == 0) + { + opencl_device_vendor_id = VENDOR_ID_INTEL_SDK; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0) + { + opencl_device_vendor_id = VENDOR_ID_INTEL_BEIGNET; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_SDK) == 0) + { + opencl_device_vendor_id = VENDOR_ID_INTEL_SDK; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_MESA) == 0) + { + opencl_device_vendor_id = VENDOR_ID_MESA; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_NV) == 0) + { + opencl_device_vendor_id = VENDOR_ID_NV; + } + else if (strcmp (opencl_device_vendor, CL_VENDOR_POCL) == 0) + { + opencl_device_vendor_id = VENDOR_ID_POCL; + } + else + { + opencl_device_vendor_id = VENDOR_ID_GENERIC; } - #endif // __APPLE__ - // driver_version + device_param->opencl_device_vendor_id = opencl_device_vendor_id; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, 0, NULL, ¶m_value_size) == -1) + // device_version + + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, 0, NULL, ¶m_value_size) == -1) { device_param->skipped = true; continue; } - char *opencl_driver_version = (char *) hcmalloc (param_value_size); + char *opencl_device_version = (char *) hcmalloc (param_value_size); - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, param_value_size, opencl_driver_version, NULL) == -1) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, param_value_size, opencl_device_version, NULL) == -1) { device_param->skipped = true; - hcfree (opencl_driver_version); + hcfree (opencl_device_version); continue; } - device_param->opencl_driver_version = opencl_driver_version; + device_param->opencl_device_version = opencl_device_version; - // vendor specific + // opencl_device_c_version - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, ¶m_value_size) == -1) { - #if defined (__APPLE__) - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - if (device_param->skipped == false) - { - need_iokit = true; - } - } - #endif - - #if defined (__linux__) - need_sysfs_cpu = true; - #endif + device_param->skipped = true; + continue; } - if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + char *opencl_device_c_version = (char *) hcmalloc (param_value_size); + + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, opencl_device_c_version, NULL) == -1) { - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) - { - need_adl = true; + device_param->skipped = true; + hcfree (opencl_device_c_version); + continue; + } - #if defined (__linux__) - need_sysfs_amdgpu = true; - #endif - } + device_param->opencl_device_c_version = opencl_device_c_version; - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) - { - need_nvml = true; + // max_compute_units - #if defined (_WIN) || defined (__CYGWIN__) - need_nvapi = true; - #endif - } - } + cl_uint device_processors = 0; - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL) == -1) { - // they like this - - device_param->kernel_preferred_wgs_multiple = 1; + device_param->skipped = true; + continue; } - if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) - { - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) - { - // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt - #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 + device_param->device_processors = device_processors; - // crazy, but apple does not support this query! - // the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel. - // so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default. - // we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0. + // device_host_unified_memory - device_param->kernel_preferred_wgs_multiple = 0; - } + cl_bool device_host_unified_memory = false; - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) - { - cl_uint device_wavefront_width_amd; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1) + { + device_param->skipped = true; + continue; + } - // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt - #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 + device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (device_wavefront_width_amd), &device_wavefront_width_amd, NULL) == -1) - { - device_param->skipped = true; - continue; - } + // device_global_mem - device_param->kernel_preferred_wgs_multiple = device_wavefront_width_amd; + cl_ulong device_global_mem = 0; - cl_device_topology_amd amdtopo; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->device_global_mem = device_global_mem; - device_param->pcie_domain = 0; // no attribute to query - device_param->pcie_bus = amdtopo.pcie.bus; - device_param->pcie_device = amdtopo.pcie.device; - device_param->pcie_function = amdtopo.pcie.function; - } + device_param->device_available_mem = 0; - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) - { - cl_uint device_warp_size_nv; + // device_maxmem_alloc - // from deps/OpenCL-Headers/CL/cl_ext.h - #define CL_DEVICE_WARP_SIZE_NV 0x4003 + cl_ulong device_maxmem_alloc = 0; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WARP_SIZE_NV, sizeof (device_warp_size_nv), &device_warp_size_nv, NULL) == -1) - { - device_param->skipped = true; - continue; - } + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL) == -1) + { + device_param->skipped = true; + continue; + } - device_param->kernel_preferred_wgs_multiple = device_warp_size_nv; + device_param->device_maxmem_alloc = device_maxmem_alloc; - cl_uint pci_bus_id_nv; // is cl_uint the right type for them?? - cl_uint pci_slot_id_nv; + if (device_param->device_host_unified_memory == 1) + { + // so, we actually have only half the memory because we need the same buffers on host side - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->device_maxmem_alloc /= 2; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL) == -1) - { - device_param->skipped = true; - continue; - } + // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes + // testwise disabling that + //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff); - device_param->pcie_domain = 0; // no attribute to query - device_param->pcie_bus = (u8) (pci_bus_id_nv); - device_param->pcie_device = (u8) (pci_slot_id_nv >> 3); - device_param->pcie_function = (u8) (pci_slot_id_nv & 7); + // max_work_group_size - int sm_minor = 0; - int sm_major = 0; + size_t device_maxworkgroup_size = 0; - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL) == -1) - { - device_param->skipped = true; - continue; - } + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL) == -1) - { - device_param->skipped = true; - continue; - } + device_param->device_maxworkgroup_size = device_maxworkgroup_size; - device_param->sm_minor = sm_minor; - device_param->sm_major = sm_major; + // max_clock_frequency - cl_uint kernel_exec_timeout = 0; - - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL) == -1) - { - device_param->skipped = true; - continue; - } - - device_param->kernel_exec_timeout = kernel_exec_timeout; + cl_uint device_maxclock_frequency = 0; - // CPU burning loop damper - // Value is given as number between 0-100 - // By default 8% + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL) == -1) + { + device_param->skipped = true; + continue; + } - device_param->spin_damp = (double) user_options->spin_damp / 100; + device_param->device_maxclock_frequency = device_maxclock_frequency; - if (user_options->stdout_flag == false) - { - // recommend CUDA + // device_endian_little - if ((backend_ctx->cuda == NULL) || (backend_ctx->nvrtc == NULL)) - { - if (user_options->backend_ignore_cuda == false) - { - if (backend_ctx->rc_cuda_init == -1) - { - event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA CUDA library."); - event_log_warning (hashcat_ctx, NULL); - } - else - { - event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA CUDA library."); - event_log_warning (hashcat_ctx, NULL); - } + cl_bool device_endian_little = CL_FALSE; - if (backend_ctx->rc_nvrtc_init == -1) - { - event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA RTC library."); - event_log_warning (hashcat_ctx, NULL); - } - else - { - event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA RTC library."); - event_log_warning (hashcat_ctx, NULL); - } + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL) == -1) + { + device_param->skipped = true; + continue; + } - event_log_warning (hashcat_ctx, "* Device #%u: CUDA SDK Toolkit not installed or incorrectly installed.", device_id + 1); - event_log_warning (hashcat_ctx, " CUDA SDK Toolkit required for proper device support and utilization."); - event_log_warning (hashcat_ctx, " Falling back to OpenCL runtime."); + if (device_endian_little == CL_FALSE) + { + event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1); - event_log_warning (hashcat_ctx, NULL); - } - } - } - } + device_param->skipped = true; } - // instruction set + // device_available - // fixed values works only for nvidia devices - // dynamical values for amd see time intensive section below + cl_bool device_available = CL_FALSE; - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL) == -1) { - const int sm = (device_param->sm_major * 10) + device_param->sm_minor; + device_param->skipped = true; + continue; + } - device_param->has_add = (sm >= 12) ? true : false; - device_param->has_addc = (sm >= 12) ? true : false; - device_param->has_sub = (sm >= 12) ? true : false; - device_param->has_subc = (sm >= 12) ? true : false; - device_param->has_bfe = (sm >= 20) ? true : false; - device_param->has_lop3 = (sm >= 50) ? true : false; - device_param->has_mov64 = (sm >= 10) ? true : false; - device_param->has_prmt = (sm >= 20) ? true : false; + if (device_available == CL_FALSE) + { + event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1); + + device_param->skipped = true; } - // common driver check + // device_compiler_available - if (device_param->skipped == false) + cl_bool device_compiler_available = CL_FALSE; + + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL) == -1) { - if ((user_options->force == false) && (user_options->backend_info == false)) - { - if (opencl_device_type & CL_DEVICE_TYPE_CPU) - { - if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK) - { - bool intel_warn = false; + device_param->skipped = true; + continue; + } - // Intel OpenCL runtime 18 + if (device_compiler_available == CL_FALSE) + { + event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1); - int opencl_driver1 = 0; - int opencl_driver2 = 0; - int opencl_driver3 = 0; - int opencl_driver4 = 0; + device_param->skipped = true; + } - const int res18 = sscanf (device_param->opencl_driver_version, "%d.%d.%d.%d", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4); + // device_execution_capabilities - if (res18 == 4) - { - // so far all versions 18 are ok - } - else - { - // Intel OpenCL runtime 16 + cl_device_exec_capabilities device_execution_capabilities; - float opencl_version = 0; - int opencl_build = 0; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL) == -1) + { + device_param->skipped = true; + continue; + } - const int res16 = sscanf (device_param->opencl_device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build); + if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0) + { + event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1); - if (res16 == 2) - { - if (opencl_build < 25) intel_warn = true; - } - } + device_param->skipped = true; + } - if (intel_warn == true) - { - event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->opencl_driver_version); + // device_extensions - event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported runtime."); - event_log_warning (hashcat_ctx, "See hashcat.net for the officially supported Intel OpenCL runtime."); - event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); - event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors."); - event_log_warning (hashcat_ctx, NULL); + size_t device_extensions_size; - device_param->skipped = true; - continue; - } - } - } - else if (opencl_device_type & CL_DEVICE_TYPE_GPU) - { - if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) - { - bool amd_warn = true; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size) == -1) + { + device_param->skipped = true; + continue; + } - #if defined (__linux__) - // AMDGPU-PRO Driver 16.40 and higher - if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 2117) amd_warn = false; - // AMDGPU-PRO Driver 16.50 is known to be broken - if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2236) amd_warn = true; - // AMDGPU-PRO Driver 16.60 is known to be broken - if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2264) amd_warn = true; - // AMDGPU-PRO Driver 17.10 is known to be broken - if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2348) amd_warn = true; - // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117 - #elif defined (_WIN) - // AMD Radeon Software 14.9 and higher, should be updated to 15.12 - if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 1573) amd_warn = false; - #else - // we have no information about other os - if (amd_warn == true) amd_warn = false; - #endif + char *device_extensions = (char *) hcmalloc (device_extensions_size + 1); - if (amd_warn == true) - { - event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->opencl_driver_version); + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL) == -1) + { + device_param->skipped = true; + hcfree (device_extensions); + continue; + } - event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver."); - event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers."); - event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); - event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors."); - event_log_warning (hashcat_ctx, NULL); + if (strstr (device_extensions, "base_atomics") == 0) + { + event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1); - device_param->skipped = true; - continue; - } - } + device_param->skipped = true; + } - if (device_param->opencl_platform_vendor_id == VENDOR_ID_NV) - { - int nv_warn = true; + if (strstr (device_extensions, "byte_addressable_store") == 0) + { + event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1); - int version_maj = 0; - int version_min = 0; + device_param->skipped = true; + } - const int r = sscanf (device_param->opencl_driver_version, "%d.%d", &version_maj, &version_min); + hcfree (device_extensions); - if (r == 2) - { - // nvidia 441.x looks ok + // kernel_preferred_wgs_multiple - if (version_maj == 440) - { - if (version_min >= 64) - { - nv_warn = false; - } - } - else - { - // unknown version scheme, probably new driver version + // There is global query for this attribute on OpenCL that is not linked to a specific kernel, so we set it to a fixed value + // Later in the code, we add vendor specific extensions to query it - nv_warn = false; - } - } - else - { - // unknown version scheme, probably new driver version + device_param->kernel_preferred_wgs_multiple = 8; - nv_warn = false; - } + // device_local_mem_type - if (nv_warn == true) - { - event_log_warning (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version); - event_log_warning (hashcat_ctx, NULL); + cl_device_local_mem_type device_local_mem_type; - event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver."); - event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers."); - event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); - event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors."); - event_log_warning (hashcat_ctx, NULL); - - device_param->skipped = true; - continue; - } + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (device_param->sm_major < 5) - { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " CUDA compute capability version 5.0 (Maxwell) or higher."); - } + device_param->device_local_mem_type = device_local_mem_type; - if (device_param->kernel_exec_timeout != 0) - { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); - if (user_options->quiet == false) event_log_warning (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); - } - } - } - } + // device_max_constant_buffer_size - /** - * activate device - */ + cl_ulong device_max_constant_buffer_size; - opencl_devices_active++; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL) == -1) + { + device_param->skipped = true; + continue; } - } - } - } - - backend_ctx->opencl_devices_cnt = opencl_devices_cnt; - backend_ctx->opencl_devices_active = opencl_devices_active; - // all devices combined go into backend_* variables + if (device_local_mem_type == CL_LOCAL) + { + if (device_max_constant_buffer_size < 65536) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1); - backend_ctx->backend_devices_cnt = cuda_devices_cnt + hip_devices_cnt + opencl_devices_cnt; - backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + opencl_devices_active; + device_param->skipped = true; + } + } - // find duplicate devices + // device_local_mem_size - //if ((cuda_devices_cnt > 0) && (hip_devices_cnt > 0) && (opencl_devices_cnt > 0)) - //{ - // using force here enables both devices, which is the worst possible outcome - // many users force by default, so this is not a good idea + cl_ulong device_local_mem_size = 0; - //if (user_options->force == false) - //{ - backend_ctx_find_alias_devices (hashcat_ctx); - //{ - //} + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (backend_ctx->backend_devices_active == 0) - { - event_log_error (hashcat_ctx, "No devices found/left."); + if (device_local_mem_type == CL_LOCAL) + { + if (device_local_mem_size < 32768) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1); - return -1; - } + device_param->skipped = true; + } + } - // now we can calculate the number of parallel running hook threads based on - // the number cpu cores and the number of active compute devices - // unless overwritten by the user + // workaround inc! + // allocating all reported local memory causes jit to fail with: SC failed. No reason given. + // if we limit ourself to 32k it seems to work - if (user_options->hook_threads == HOOK_THREADS) - { - const u32 processor_count = hc_get_processor_count (); + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + device_local_mem_size = MIN (device_local_mem_size, 32768); + } + } + } - const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0 + device_param->device_local_mem_size = device_local_mem_size; - user_options->hook_threads = processor_count_cu; - } + // handling known bugs on POCL - // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt) + // POCL < 1.9 doesn't like quotes in the include path, see: + // https://github.com/hashcat/hashcat/issues/2950 + // https://github.com/pocl/pocl/issues/962 - if (backend_ctx->backend_devices_filter != (u64) -1) - { - const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt); + // POCL < 1.5 and older LLVM versions are known to fail compiling kernels + // https://github.com/hashcat/hashcat/issues/2344 - if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask) - { - event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter."); - event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt); + // we need to inform the user to update - return -1; - } - } + if (opencl_platform_vendor_id == VENDOR_ID_POCL) + { + char *pocl_version_ptr = strstr (opencl_platform_version, "pocl "); + char *llvm_version_ptr = strstr (opencl_platform_version, "LLVM "); - // time or resource intensive operations which we do not run if the corresponding device was skipped by the user + if ((pocl_version_ptr != NULL) && (llvm_version_ptr != NULL)) + { + bool pocl_skip = false; - if (backend_ctx->cuda) - { - // instruction test for cuda devices was replaced with fixed values (see above) + int pocl_maj = 0; + int pocl_min = 0; - /* - CUcontext cuda_context; + int pocl_bug_whitespace_on_path = 0; + int pocl_bug_kernel_compiling_failure = 0; - if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1; + const int res1 = sscanf (pocl_version_ptr, "pocl %d.%d", &pocl_maj, &pocl_min); - if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1; + if (res1 == 2) + { + const int pocl_version = (pocl_maj * 100) + pocl_min; - #define RUN_INSTRUCTION_CHECKS() \ - device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ - device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + if (pocl_version < 109) + { + if (strchr (folder_config->cpath_real, ' ') != NULL) + { + pocl_skip = true; + pocl_bug_whitespace_on_path = 1; + } - if (backend_devices_idx > 0) - { - hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + if (pocl_version < 105) + { + pocl_skip = true; + pocl_bug_kernel_compiling_failure = 1; + } + } + } - if (is_same_device_type (device_param, device_param_prev) == true) - { - device_param->has_add = device_param_prev->has_add; - device_param->has_addc = device_param_prev->has_addc; - device_param->has_sub = device_param_prev->has_sub; - device_param->has_subc = device_param_prev->has_subc; - device_param->has_bfe = device_param_prev->has_bfe; - device_param->has_lop3 = device_param_prev->has_lop3; - device_param->has_mov64 = device_param_prev->has_mov64; - device_param->has_prmt = device_param_prev->has_prmt; - } - else - { - RUN_INSTRUCTION_CHECKS(); - } - } - else - { - RUN_INSTRUCTION_CHECKS(); - } + int llvm_maj = 0; + int llvm_min = 0; - #undef RUN_INSTRUCTION_CHECKS + const int res2 = sscanf (llvm_version_ptr, "LLVM %d.%d", &llvm_maj, &llvm_min); - if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1; + if (res2 == 2) + { + const int llvm_version = (llvm_maj * 100) + llvm_min; - */ - } + if (llvm_version < 900) + { + pocl_skip = true; + } + } - if (backend_ctx->hip) - { - // TODO HIP? - // Maybe all devices supported by hip have these instructions guaranteed? + if (pocl_skip == true) + { + if (user_options->force == false) + { + event_log_error (hashcat_ctx, "* Device #%u: Outdated POCL OpenCL driver detected!", device_id + 1); - for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) - { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; + if (user_options->quiet == false) + { + if (pocl_bug_kernel_compiling_failure == 1) + { + event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives."); + } - if (device_param->is_hip == false) continue; + if (pocl_bug_whitespace_on_path == 1) + { + event_log_warning (hashcat_ctx, "Consider moving hashcat to a path with no spaces if you want to use this POCL version."); + } - device_param->has_vadd = true; - device_param->has_vaddc = true; - device_param->has_vadd_co = true; - device_param->has_vaddc_co = true; - device_param->has_vsub = true; - device_param->has_vsubb = true; - device_param->has_vsub_co = true; - device_param->has_vsubb_co = true; - device_param->has_vadd3 = true; - device_param->has_vbfe = true; - device_param->has_vperm = true; - } - } + event_log_warning (hashcat_ctx, "We recommend using a version of POCL >= 1.9"); + event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors."); + event_log_warning (hashcat_ctx, NULL); + } - if (backend_ctx->ocl) - { - for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) - { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; + device_param->skipped = true; + } + } + } + } - if (device_param->is_opencl == false) continue; + char *opencl_device_version_lower = hcstrdup (opencl_device_version); - if (user_options->backend_info == false) - { - // do not ignore in case -I because user expects a value also for skipped devices + lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower)); - if (device_param->skipped == true) continue; - } - - /** - * create context for each device - */ - - cl_context context; + if ((strstr (opencl_device_version_lower, "beignet ")) + || (strstr (opencl_device_version_lower, " beignet")) + || (strstr (opencl_device_version_lower, "mesa ")) + || (strstr (opencl_device_version_lower, " mesa"))) + { + // BEIGNET: https://github.com/hashcat/hashcat/issues/2243 + // MESA: https://github.com/hashcat/hashcat/issues/2269 - /* - cl_context_properties properties[3]; + if (user_options->force == false) + { + event_log_error (hashcat_ctx, "* Device #%u: Unstable OpenCL driver detected!", device_id + 1); - properties[0] = CL_CONTEXT_PLATFORM; - properties[1] = (cl_context_properties) device_param->opencl_platform; - properties[2] = 0; + if (user_options->quiet == false) + { + event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives."); + event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors."); + event_log_warning (hashcat_ctx, NULL); + } - CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context); - */ + device_param->skipped = true; + } + } - if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) - { - device_param->skipped = true; - continue; - } + hcfree (opencl_device_version_lower); - /** - * create command-queue - */ + // Since some times we get reports from users about not working hashcat, dropping error messages like: + // CL_INVALID_COMMAND_QUEUE and CL_OUT_OF_RESOURCES + // Turns out that this is caused by Intel OpenCL runtime handling their GPU devices + // Disable such devices unless the user forces to use it + // This is successfully workaround with new threading model and new memory management + // Tested on Windows 10 + // OpenCL.Version.: OpenCL C 2.1 + // Driver.Version.: 23.20.16.4973 - cl_command_queue command_queue; + /* + #if !defined (__APPLE__) + if (opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_BEIGNET)) + { + if (user_options->force == false) + { + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " We are waiting for updated OpenCL drivers from Intel."); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors."); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL); - if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) - { - device_param->skipped = true; - continue; - } + device_param->skipped = true; + } + } + } + #endif // __APPLE__ + */ - // instruction set + // skipped - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)) - { - #define RUN_INSTRUCTION_CHECKS() \ - device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0) + { + device_param->skipped = true; + } - if (backend_devices_idx > 0) + if ((backend_ctx->opencl_device_types_filter & (opencl_device_type)) == 0) { - hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + device_param->skipped = true; + } - if (is_same_device_type (device_param, device_param_prev) == true) - { - device_param->has_vadd = device_param_prev->has_vadd; - device_param->has_vaddc = device_param_prev->has_vaddc; - device_param->has_vadd_co = device_param_prev->has_vadd_co; - device_param->has_vaddc_co = device_param_prev->has_vaddc_co; - device_param->has_vsub = device_param_prev->has_vsub; - device_param->has_vsubb = device_param_prev->has_vsubb; - device_param->has_vsub_co = device_param_prev->has_vsub_co; - device_param->has_vsubb_co = device_param_prev->has_vsubb_co; - device_param->has_vadd3 = device_param_prev->has_vadd3; - device_param->has_vbfe = device_param_prev->has_vbfe; - device_param->has_vperm = device_param_prev->has_vperm; - } - else + #if defined (__APPLE__) + if (opencl_device_type & CL_DEVICE_TYPE_GPU) + { + //if (user_options->force == false) + if (device_param->skipped == false) { - RUN_INSTRUCTION_CHECKS(); + if (user_options->quiet == false) + { + event_log_warning (hashcat_ctx, "* Device #%u: Apple's OpenCL drivers (GPU) are known to be unreliable.", device_id + 1); + event_log_warning (hashcat_ctx, " You have been warned."); + //event_log_warning (hashcat_ctx, " There are many reports of false negatives and other issues."); + //event_log_warning (hashcat_ctx, " This is not a hashcat issue. Other projects report issues with these drivers."); + //event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors. You have been warned."); + event_log_warning (hashcat_ctx, NULL); + } + + //device_param->skipped = true; } } - else + #endif // __APPLE__ + + // driver_version + + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, 0, NULL, ¶m_value_size) == -1) { - RUN_INSTRUCTION_CHECKS(); + device_param->skipped = true; + continue; } - #undef RUN_INSTRUCTION_CHECKS - } + char *opencl_driver_version = (char *) hcmalloc (param_value_size); - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) - { - // replaced with fixed values see non time intensive section above + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, param_value_size, opencl_driver_version, NULL) == -1) + { + device_param->skipped = true; + hcfree (opencl_driver_version); + continue; + } - /* - #define RUN_INSTRUCTION_CHECKS() \ - device_param->has_add = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_addc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_sub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_subc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ - device_param->has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->opencl_driver_version = opencl_driver_version; - if (backend_devices_idx > 0) + // vendor specific + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + #if defined (__APPLE__) + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + { + if (device_param->skipped == false) + { + need_iokit = true; + } + } + #endif - if (is_same_device_type (device_param, device_param_prev) == true) + #if defined (__linux__) + need_sysfs_cpu = true; + #endif + } + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) { - device_param->has_add = device_param_prev->has_add; - device_param->has_addc = device_param_prev->has_addc; - device_param->has_sub = device_param_prev->has_sub; - device_param->has_subc = device_param_prev->has_subc; - device_param->has_bfe = device_param_prev->has_bfe; - device_param->has_lop3 = device_param_prev->has_lop3; - device_param->has_mov64 = device_param_prev->has_mov64; - device_param->has_prmt = device_param_prev->has_prmt; + need_adl = true; + + #if defined (__linux__) + need_sysfs_amdgpu = true; + #endif } - else + + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) { - RUN_INSTRUCTION_CHECKS(); + need_nvml = true; + + #if defined (_WIN) || defined (__CYGWIN__) + need_nvapi = true; + #endif } } - else + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - RUN_INSTRUCTION_CHECKS(); + // they like this + + device_param->kernel_preferred_wgs_multiple = 1; } - #undef RUN_INSTRUCTION_CHECKS - */ - } + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) + { + // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt + #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 - // available device memory - // This test causes an GPU memory usage spike. - // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist. - // We will simply not run it if that device was skipped by the user. + // crazy, but apple does not support this query! + // the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel. + // so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default. + // we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0. - #define MAX_ALLOC_CHECKS_CNT 8192 - #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024) + device_param->kernel_preferred_wgs_multiple = 0; + } - device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) + { + cl_uint device_wavefront_width_amd; - if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) - { - // OK, so the problem here is the following: - // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, - // but there's no way to ask for available memory on the device. - // In combination, most OpenCL runtimes implementation of clCreateBuffer() - // are doing so called lazy memory allocation on the device. - // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory) - // running on the host we end up with an error type of this: - // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE - // The clEnqueueNDRangeKernel() is because of the lazy allocation - // The best way to workaround this problem is if we would be able to ask for available memory, - // The idea here is to try to evaluate available memory by allocating it till it errors + // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt + #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 - cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem)); + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (device_wavefront_width_amd), &device_wavefront_width_amd, NULL) == -1) + { + device_param->skipped = true; + continue; + } - u64 c; + device_param->kernel_preferred_wgs_multiple = device_wavefront_width_amd; - for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) - { - if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; - - cl_int CL_err; + cl_device_topology_amd amdtopo; - OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL) == -1) + { + device_param->skipped = true; + continue; + } - tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err); + device_param->pcie_domain = 0; // no attribute to query + device_param->pcie_bus = amdtopo.pcie.bus; + device_param->pcie_device = amdtopo.pcie.device; + device_param->pcie_function = amdtopo.pcie.function; + } - if (CL_err != CL_SUCCESS) + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) { - c--; - - break; - } + cl_uint device_warp_size_nv; - // transfer only a few byte should be enough to force the runtime to actually allocate the memory + // from deps/OpenCL-Headers/CL/cl_ext.h + #define CL_DEVICE_WARP_SIZE_NV 0x4003 - u8 tmp_host[8]; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WARP_SIZE_NV, sizeof (device_warp_size_nv), &device_warp_size_nv, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + device_param->kernel_preferred_wgs_multiple = device_warp_size_nv; - if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + cl_uint pci_bus_id_nv; // is cl_uint the right type for them?? + cl_uint pci_slot_id_nv; - if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; - } + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL) == -1) + { + device_param->skipped = true; + continue; + } - device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; + device_param->pcie_domain = 0; // no attribute to query + device_param->pcie_bus = (u8) (pci_bus_id_nv); + device_param->pcie_device = (u8) (pci_slot_id_nv >> 3); + device_param->pcie_function = (u8) (pci_slot_id_nv & 7); - if (c > 0) - { - device_param->device_available_mem *= c; - } + int sm_minor = 0; + int sm_major = 0; - // clean up + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL) == -1) + { + device_param->skipped = true; + continue; + } - for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) - { - if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL) == -1) + { + device_param->skipped = true; + continue; + } - if (tmp_device[c] != NULL) - { - if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1; - } - } + device_param->sm_minor = sm_minor; + device_param->sm_major = sm_major; - hcfree (tmp_device); - } + cl_uint kernel_exec_timeout = 0; - hc_clReleaseCommandQueue (hashcat_ctx, command_queue); + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL) == -1) + { + device_param->skipped = true; + continue; + } - hc_clReleaseContext (hashcat_ctx, context); + device_param->kernel_exec_timeout = kernel_exec_timeout; - if (device_param->device_host_unified_memory == 1) - { - // so, we actually have only half the memory because we need the same buffers on host side + // CPU burning loop damper + // Value is given as number between 0-100 + // By default 8% - device_param->device_available_mem /= 2; - } - } - } + device_param->spin_damp = (double) user_options->spin_damp / 100; - backend_ctx->target_msec = TARGET_MSEC_PROFILE[user_options->workload_profile - 1]; + if (user_options->stdout_flag == false) + { + // recommend CUDA - backend_ctx->need_adl = need_adl; - backend_ctx->need_nvml = need_nvml; - backend_ctx->need_nvapi = need_nvapi; - backend_ctx->need_sysfs_amdgpu = need_sysfs_amdgpu; - backend_ctx->need_sysfs_cpu = need_sysfs_cpu; - backend_ctx->need_iokit = need_iokit; + if ((backend_ctx->cuda == NULL) || (backend_ctx->nvrtc == NULL)) + { + if (user_options->backend_ignore_cuda == false) + { + if (backend_ctx->rc_cuda_init == -1) + { + event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA CUDA library."); + event_log_warning (hashcat_ctx, NULL); + } + else + { + event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA CUDA library."); + event_log_warning (hashcat_ctx, NULL); + } - backend_ctx->comptime = comptime; + if (backend_ctx->rc_nvrtc_init == -1) + { + event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA RTC library."); + event_log_warning (hashcat_ctx, NULL); + } + else + { + event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA RTC library."); + event_log_warning (hashcat_ctx, NULL); + } - return 0; -} + event_log_warning (hashcat_ctx, "* Device #%u: CUDA SDK Toolkit not installed or incorrectly installed.", device_id + 1); + event_log_warning (hashcat_ctx, " CUDA SDK Toolkit required for proper device support and utilization."); + event_log_warning (hashcat_ctx, " Falling back to OpenCL runtime."); -void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + event_log_warning (hashcat_ctx, NULL); + } + } + } + } + } - if (backend_ctx->enabled == false) return; + // instruction set - for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < backend_ctx->opencl_platforms_cnt; opencl_platforms_idx++) - { - hcfree (backend_ctx->opencl_platforms_devices[opencl_platforms_idx]); - hcfree (backend_ctx->opencl_platforms_name[opencl_platforms_idx]); - hcfree (backend_ctx->opencl_platforms_vendor[opencl_platforms_idx]); - hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]); - } + // fixed values works only for nvidia devices + // dynamical values for amd see time intensive section below - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) - { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) + { + const int sm = (device_param->sm_major * 10) + device_param->sm_minor; - hcfree (device_param->device_name); + device_param->has_add = (sm >= 12) ? true : false; + device_param->has_addc = (sm >= 12) ? true : false; + device_param->has_sub = (sm >= 12) ? true : false; + device_param->has_subc = (sm >= 12) ? true : false; + device_param->has_bfe = (sm >= 20) ? true : false; + device_param->has_lop3 = (sm >= 50) ? true : false; + device_param->has_mov64 = (sm >= 10) ? true : false; + device_param->has_prmt = (sm >= 20) ? true : false; + } - if (device_param->is_opencl == true) - { - hcfree (device_param->opencl_driver_version); - hcfree (device_param->opencl_device_version); - hcfree (device_param->opencl_device_c_version); - hcfree (device_param->opencl_device_vendor); - } - } + // common driver check - backend_ctx->backend_devices_cnt = 0; - backend_ctx->backend_devices_active = 0; - backend_ctx->cuda_devices_cnt = 0; - backend_ctx->cuda_devices_active = 0; - backend_ctx->hip_devices_cnt = 0; - backend_ctx->hip_devices_active = 0; - backend_ctx->opencl_devices_cnt = 0; - backend_ctx->opencl_devices_active = 0; + if (device_param->skipped == false) + { + if ((user_options->force == false) && (user_options->backend_info == false)) + { + if (opencl_device_type & CL_DEVICE_TYPE_CPU) + { + if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK) + { + bool intel_warn = false; - backend_ctx->need_adl = false; - backend_ctx->need_nvml = false; - backend_ctx->need_nvapi = false; - backend_ctx->need_sysfs_amdgpu = false; - backend_ctx->need_sysfs_cpu = false; - backend_ctx->need_iokit = false; -} + // Intel OpenCL runtime 18 -void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + int opencl_driver1 = 0; + int opencl_driver2 = 0; + int opencl_driver3 = 0; + int opencl_driver4 = 0; - if (backend_ctx->enabled == false) return; + const int res18 = sscanf (device_param->opencl_driver_version, "%d.%d.%d.%d", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4); - for (int backend_devices_cnt_src = 0; backend_devices_cnt_src < backend_ctx->backend_devices_cnt; backend_devices_cnt_src++) - { - hc_device_param_t *device_param_src = &backend_ctx->devices_param[backend_devices_cnt_src]; + if (res18 == 4) + { + // so far all versions 18 are ok + } + else + { + // Intel OpenCL runtime 16 - if (device_param_src->skipped == true) continue; - if (device_param_src->skipped_warning == true) continue; + float opencl_version = 0; + int opencl_build = 0; - for (int backend_devices_cnt_dst = backend_devices_cnt_src + 1; backend_devices_cnt_dst < backend_ctx->backend_devices_cnt; backend_devices_cnt_dst++) - { - hc_device_param_t *device_param_dst = &backend_ctx->devices_param[backend_devices_cnt_dst]; + const int res16 = sscanf (device_param->opencl_device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build); - if (device_param_dst->skipped == true) continue; - if (device_param_dst->skipped_warning == true) continue; + if (res16 == 2) + { + if (opencl_build < 25) intel_warn = true; + } + } - if (is_same_device_type (device_param_src, device_param_dst) == false) continue; + if (intel_warn == true) + { + event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->opencl_driver_version); - device_param_dst->kernel_accel = device_param_src->kernel_accel; - device_param_dst->kernel_loops = device_param_src->kernel_loops; - device_param_dst->kernel_threads = device_param_src->kernel_threads; - - const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param_dst->device_processors) * device_param_dst->kernel_threads; - - device_param_dst->hardware_power = hardware_power; + event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported runtime."); + event_log_warning (hashcat_ctx, "See hashcat.net for the officially supported Intel OpenCL runtime."); + event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); + event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors."); + event_log_warning (hashcat_ctx, NULL); - const u32 kernel_power = device_param_dst->hardware_power * device_param_dst->kernel_accel; + device_param->skipped = true; + continue; + } + } + } + else if (opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) + { + bool amd_warn = true; - device_param_dst->kernel_power = kernel_power; - } - } -} + #if defined (__linux__) + // AMDGPU-PRO Driver 16.40 and higher + if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 2117) amd_warn = false; + // AMDGPU-PRO Driver 16.50 is known to be broken + if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2236) amd_warn = true; + // AMDGPU-PRO Driver 16.60 is known to be broken + if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2264) amd_warn = true; + // AMDGPU-PRO Driver 17.10 is known to be broken + if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2348) amd_warn = true; + // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117 + #elif defined (_WIN) + // AMD Radeon Software 14.9 and higher, should be updated to 15.12 + if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 1573) amd_warn = false; + #else + // we have no information about other os + if (amd_warn == true) amd_warn = false; + #endif -void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - status_ctx_t *status_ctx = hashcat_ctx->status_ctx; - user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; - user_options_t *user_options = hashcat_ctx->user_options; + if (amd_warn == true) + { + event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->opencl_driver_version); - if (backend_ctx->enabled == false) return; + event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver."); + event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers."); + event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); + event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors."); + event_log_warning (hashcat_ctx, NULL); - u32 kernel_power_all = 0; + device_param->skipped = true; + continue; + } + } - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) - { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + if (device_param->opencl_platform_vendor_id == VENDOR_ID_NV) + { + int nv_warn = true; - if (device_param->skipped == true) continue; - if (device_param->skipped_warning == true) continue; + int version_maj = 0; + int version_min = 0; - kernel_power_all += device_param->kernel_power; - } + const int r = sscanf (device_param->opencl_driver_version, "%d.%d", &version_maj, &version_min); - backend_ctx->kernel_power_all = kernel_power_all; + if (r == 2) + { + // nvidia 441.x looks ok - /* - * Inform user about possible slow speeds - */ + if (version_maj == 440) + { + if (version_min >= 64) + { + nv_warn = false; + } + } + else + { + // unknown version scheme, probably new driver version - if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK)) - { - if (status_ctx->words_base < kernel_power_all) - { - if (user_options->quiet == false) - { - clear_prompt (hashcat_ctx); + nv_warn = false; + } + } + else + { + // unknown version scheme, probably new driver version - event_log_advice (hashcat_ctx, "The wordlist or mask that you are using is too small."); - event_log_advice (hashcat_ctx, "This means that hashcat cannot use the full parallel power of your device(s)."); - event_log_advice (hashcat_ctx, "Unless you supply more work, your cracking speed will drop."); - event_log_advice (hashcat_ctx, "For tips on supplying more work, see: https://hashcat.net/faq/morework"); - event_log_advice (hashcat_ctx, NULL); - } - } - } -} + nv_warn = false; + } -void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx) -{ - combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx; - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - hashes_t *hashes = hashcat_ctx->hashes; - mask_ctx_t *mask_ctx = hashcat_ctx->mask_ctx; - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx; - user_options_t *user_options = hashcat_ctx->user_options; - user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; + if (nv_warn == true) + { + event_log_warning (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version); + event_log_warning (hashcat_ctx, NULL); - if (backend_ctx->enabled == false) return; + event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver."); + event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers."); + event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); + event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors."); + event_log_warning (hashcat_ctx, NULL); - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) - { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + device_param->skipped = true; + continue; + } - if (device_param->skipped == true) continue; - if (device_param->skipped_warning == true) continue; + if (device_param->sm_major < 5) + { + if (user_options->quiet == false) + { + event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); + event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); + event_log_warning (hashcat_ctx, " CUDA compute capability version 5.0 (Maxwell) or higher."); + } + } - device_param->kernel_loops_min = device_param->kernel_loops_min_sav; - device_param->kernel_loops_max = device_param->kernel_loops_max_sav; + if (device_param->kernel_exec_timeout != 0) + { + if (user_options->quiet == false) + { + event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); + event_log_warning (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); + event_log_warning (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + } + } + } + } + } - if (device_param->kernel_loops_min < device_param->kernel_loops_max) - { - u32 innerloop_cnt = 0; + /** + * activate device + */ - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - if (user_options->slow_candidates == true) - { - innerloop_cnt = 1; - } - else - { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) innerloop_cnt = MIN (KERNEL_RULES, (u32) straight_ctx->kernel_rules_cnt); - else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) innerloop_cnt = MIN (KERNEL_COMBS, (u32) combinator_ctx->combs_cnt); - else if (user_options_extra->attack_kern == ATTACK_KERN_BF) innerloop_cnt = MIN (KERNEL_BFS, (u32) mask_ctx->bfs_cnt); + opencl_devices_active++; } } - else - { - innerloop_cnt = hashes->salts_buf[0].salt_iter; - } - - if ((innerloop_cnt >= device_param->kernel_loops_min) && - (innerloop_cnt <= device_param->kernel_loops_max)) - { - device_param->kernel_loops_max = innerloop_cnt; - } } } -} -static int get_cuda_kernel_wgs (hashcat_ctx_t *hashcat_ctx, CUfunction function, u32 *result) -{ - int max_threads_per_block; + backend_ctx->opencl_devices_cnt = opencl_devices_cnt; + backend_ctx->opencl_devices_active = opencl_devices_active; - if (hc_cuFuncGetAttribute (hashcat_ctx, &max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1; + // all devices combined go into backend_* variables - *result = (u32) max_threads_per_block; + backend_ctx->backend_devices_cnt = cuda_devices_cnt + hip_devices_cnt + metal_devices_cnt + opencl_devices_cnt; + backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + metal_devices_active + opencl_devices_active; - return 0; -} + // find duplicate devices -static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result) -{ - int shared_size_bytes; + //if ((cuda_devices_cnt > 0) && (hip_devices_cnt > 0) && (opencl_devices_cnt > 0)) + //{ + // using force here enables both devices, which is the worst possible outcome + // many users force by default, so this is not a good idea - if (hc_cuFuncGetAttribute (hashcat_ctx, &shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1; + //if (user_options->force == false) + //{ + backend_ctx_find_alias_devices (hashcat_ctx); + //{ + //} - *result = (u64) shared_size_bytes; + if (backend_ctx->backend_devices_active == 0) + { + event_log_error (hashcat_ctx, "No devices found/left."); - return 0; -} + return -1; + } -static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u32 *result) -{ - int max_threads_per_block; + // now we can calculate the number of parallel running hook threads based on + // the number cpu cores and the number of active compute devices + // unless overwritten by the user - if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1; + if (user_options->hook_threads == HOOK_THREADS) + { + const u32 processor_count = hc_get_processor_count (); - *result = (u32) max_threads_per_block; + const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0 - return 0; -} + user_options->hook_threads = processor_count_cu; + } -static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u64 *result) -{ - int shared_size_bytes; + // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt) - if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1; + if (backend_ctx->backend_devices_filter != (u64) -1) + { + const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt); - *result = (u64) shared_size_bytes; + if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask) + { + event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter."); + event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt); - return 0; -} + return -1; + } + } -static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) -{ - size_t work_group_size = 0; - - if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL) == -1) return -1; - - u32 kernel_threads = (u32) work_group_size; - - size_t compile_work_group_size[3] = { 0, 0, 0 }; - - if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL) == -1) return -1; - - const size_t cwgs_total = compile_work_group_size[0] * compile_work_group_size[1] * compile_work_group_size[2]; + // time or resource intensive operations which we do not run if the corresponding device was skipped by the user - if (cwgs_total > 0) + if (backend_ctx->cuda) { - kernel_threads = MIN (kernel_threads, (u32) cwgs_total); - } - - *result = kernel_threads; - - return 0; -} - -static int get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) -{ - size_t preferred_work_group_size_multiple = 0; - - if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof (preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL) == -1) return -1; - - *result = (u32) preferred_work_group_size_multiple; - - return 0; -} - -static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result) -{ - cl_ulong local_mem_size = 0; + // instruction test for cuda devices was replaced with fixed values (see above) - if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (local_mem_size), &local_mem_size, NULL) == -1) return -1; + /* + CUcontext cuda_context; - *result = local_mem_size; + if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1; - return 0; -} + if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1; -static int get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result) -{ - cl_ulong dynamic_local_mem_size = 0; + #define RUN_INSTRUCTION_CHECKS() \ + device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ + device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (dynamic_local_mem_size), &dynamic_local_mem_size, NULL) == -1) return -1; + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; - // unknown how to query this information in OpenCL - // we therefore reset to zero - // the above call to hc_clGetKernelWorkGroupInfo() is just to avoid compiler warnings + if (is_same_device_type (device_param, device_param_prev) == true) + { + device_param->has_add = device_param_prev->has_add; + device_param->has_addc = device_param_prev->has_addc; + device_param->has_sub = device_param_prev->has_sub; + device_param->has_subc = device_param_prev->has_subc; + device_param->has_bfe = device_param_prev->has_bfe; + device_param->has_lop3 = device_param_prev->has_lop3; + device_param->has_mov64 = device_param_prev->has_mov64; + device_param->has_prmt = device_param_prev->has_prmt; + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); + } - dynamic_local_mem_size = 0; + #undef RUN_INSTRUCTION_CHECKS - *result = dynamic_local_mem_size; + if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1; - return 0; -} + */ + } -static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module) -{ - const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - const folder_config_t *folder_config = hashcat_ctx->folder_config; - const user_options_t *user_options = hashcat_ctx->user_options; + if (backend_ctx->hip) + { + // TODO HIP? + // Maybe all devices supported by hip have these instructions guaranteed? - bool cached = true; + for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; - if (cache_disable == true) - { - cached = false; - } + if (device_param->is_hip == false) continue; - if (hc_path_read (cached_file) == false) - { - cached = false; + device_param->has_vadd = true; + device_param->has_vaddc = true; + device_param->has_vadd_co = true; + device_param->has_vaddc_co = true; + device_param->has_vsub = true; + device_param->has_vsubb = true; + device_param->has_vsub_co = true; + device_param->has_vsubb_co = true; + device_param->has_vadd3 = true; + device_param->has_vbfe = true; + device_param->has_vperm = true; + } } - if (hc_path_is_empty (cached_file) == true) + #if defined (__APPLE__) + if (backend_ctx->mtl) { - cached = false; - } + for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; - /** - * kernel compile or load - */ + if (device_param->is_metal == false) continue; - size_t kernel_lengths_buf = 0; + if (user_options->backend_info == false) + { + // do not ignore in case -I because user expects a value also for skipped devices - size_t *kernel_lengths = &kernel_lengths_buf; + if (device_param->skipped == true) continue; + } - char *kernel_sources_buf = NULL; + /** + * create command-queue + */ - char **kernel_sources = &kernel_sources_buf; + mtl_command_queue command_queue; - if (cached == false) - { - #if defined (DEBUG) - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache. Please be patient...", device_param->device_id + 1, filename_from_filepath (cached_file)); - #endif + if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &command_queue) == -1) + { + device_param->skipped = true; + continue; + } - if (read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources) == false) return false; + // available device memory + // This test causes an GPU memory usage spike. + // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist. + // We will simply not run it if that device was skipped by the user. - if (device_param->is_cuda == true) - { - nvrtcProgram program; + #define MAX_ALLOC_CHECKS_CNT 8192 + #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024) - if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false; + device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; - char **nvrtc_options = (char **) hccalloc (5 + strlen (build_options_buf) + 1, sizeof (char *)); // ... + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + // following the same logic as for OpenCL, explained later - nvrtc_options[0] = "--restrict"; - nvrtc_options[1] = "--device-as-default-execution-space"; - nvrtc_options[2] = "--gpu-architecture"; + mtl_mem *tmp_device = (mtl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (mtl_mem)); - hc_asprintf (&nvrtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor); + u64 c; - // untested on windows, but it should work - #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) - hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", "OpenCL"); - #else - hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", folder_config->cpath_real); - #endif + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; - char *nvrtc_options_string = hcstrdup (build_options_buf); + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, MAX_ALLOC_CHECKS_SIZE, NULL, &tmp_device[c]) == -1) + { + c--; - const int num_options = 5 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 5); + break; + } - const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options); + // transfer only a few byte should be enough to force the runtime to actually allocate the memory - hcfree (nvrtc_options_string); - hcfree (nvrtc_options); + u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - size_t build_log_size = 0; + if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break; + if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break; - hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size); + if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break; + if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break; + } - #if defined (DEBUG) - if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1)) - #else - if (rc_nvrtcCompileProgram == -1) - #endif - { - char *build_log = (char *) hcmalloc (build_log_size + 1); + device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; - if (hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log) == -1) + if (c > 0) { - hcfree (build_log); - - return false; + device_param->device_available_mem *= c; } - build_log[build_log_size] = 0; + // clean up - puts (build_log); + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; - hcfree (build_log); + if (tmp_device[c] != NULL) + { + if (hc_mtlReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1; + } + } + + hcfree (tmp_device); } - if (rc_nvrtcCompileProgram == -1) + hc_mtlReleaseCommandQueue (hashcat_ctx, command_queue); + + if (device_param->device_host_unified_memory == 1) { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + // so, we actually have only half the memory because we need the same buffers on host side - return false; + device_param->device_available_mem /= 2; } + } + } + #endif // __APPLE__ - size_t binary_size = 0; - - if (hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size) == -1) return false; + if (backend_ctx->ocl) + { + for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; - char *binary = (char *) hcmalloc (binary_size); + if (device_param->is_opencl == false) continue; - if (hc_nvrtcGetPTX (hashcat_ctx, program, binary) == -1) return false; + if (user_options->backend_info == false) + { + // do not ignore in case -I because user expects a value also for skipped devices - if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return false; + if (device_param->skipped == true) continue; + } - #define LOG_SIZE 8192 + /** + * create context for each device + */ - char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); - char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + cl_context context; - int mod_cnt = 6; - - CUjit_option mod_opts[7]; - void *mod_vals[7]; - - mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT; - mod_vals[0] = (void *) 0; + /* + cl_context_properties properties[3]; - mod_opts[1] = CU_JIT_LOG_VERBOSE; - mod_vals[1] = (void *) 1; + properties[0] = CL_CONTEXT_PLATFORM; + properties[1] = (cl_context_properties) device_param->opencl_platform; + properties[2] = 0; - mod_opts[2] = CU_JIT_INFO_LOG_BUFFER; - mod_vals[2] = (void *) mod_info_log; + CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context); + */ - mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - mod_vals[3] = (void *) LOG_SIZE; + if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) + { + device_param->skipped = true; + continue; + } - mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER; - mod_vals[4] = (void *) mod_error_log; + /** + * create command-queue + */ - mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; - mod_vals[5] = (void *) LOG_SIZE; + cl_command_queue command_queue; - if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) { - mod_opts[6] = CU_JIT_MAX_REGISTERS; - mod_vals[6] = (void *) 128; - - mod_cnt++; + device_param->skipped = true; + continue; } - #if defined (WITH_CUBIN) + // instruction set - char *jit_info_log = (char *) hcmalloc (LOG_SIZE + 1); - char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1); + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)) + { + #define RUN_INSTRUCTION_CHECKS() \ + device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ - int jit_cnt = 6; + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; - CUjit_option jit_opts[7]; - void *jit_vals[7]; + if (is_same_device_type (device_param, device_param_prev) == true) + { + device_param->has_vadd = device_param_prev->has_vadd; + device_param->has_vaddc = device_param_prev->has_vaddc; + device_param->has_vadd_co = device_param_prev->has_vadd_co; + device_param->has_vaddc_co = device_param_prev->has_vaddc_co; + device_param->has_vsub = device_param_prev->has_vsub; + device_param->has_vsubb = device_param_prev->has_vsubb; + device_param->has_vsub_co = device_param_prev->has_vsub_co; + device_param->has_vsubb_co = device_param_prev->has_vsubb_co; + device_param->has_vadd3 = device_param_prev->has_vadd3; + device_param->has_vbfe = device_param_prev->has_vbfe; + device_param->has_vperm = device_param_prev->has_vperm; + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); + } - jit_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT; - jit_vals[0] = (void *) 0; + #undef RUN_INSTRUCTION_CHECKS + } - jit_opts[1] = CU_JIT_LOG_VERBOSE; - jit_vals[1] = (void *) 1; + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) + { + // replaced with fixed values see non time intensive section above - jit_opts[2] = CU_JIT_INFO_LOG_BUFFER; - jit_vals[2] = (void *) jit_info_log; + /* + #define RUN_INSTRUCTION_CHECKS() \ + device_param->has_add = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_addc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_sub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_subc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ + device_param->has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - jit_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - jit_vals[3] = (void *) LOG_SIZE; + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; - jit_opts[4] = CU_JIT_ERROR_LOG_BUFFER; - jit_vals[4] = (void *) jit_error_log; + if (is_same_device_type (device_param, device_param_prev) == true) + { + device_param->has_add = device_param_prev->has_add; + device_param->has_addc = device_param_prev->has_addc; + device_param->has_sub = device_param_prev->has_sub; + device_param->has_subc = device_param_prev->has_subc; + device_param->has_bfe = device_param_prev->has_bfe; + device_param->has_lop3 = device_param_prev->has_lop3; + device_param->has_mov64 = device_param_prev->has_mov64; + device_param->has_prmt = device_param_prev->has_prmt; + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); + } - jit_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; - jit_vals[5] = (void *) LOG_SIZE; + #undef RUN_INSTRUCTION_CHECKS + */ + } - if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) - { - jit_opts[6] = CU_JIT_MAX_REGISTERS; - jit_vals[6] = (void *) 128; + // available device memory + // This test causes an GPU memory usage spike. + // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist. + // We will simply not run it if that device was skipped by the user. - jit_cnt++; - } + #define MAX_ALLOC_CHECKS_CNT 8192 + #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024) - CUlinkState state; + device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; - if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1) + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", jit_error_log); - event_log_error (hashcat_ctx, NULL); + // OK, so the problem here is the following: + // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, + // but there's no way to ask for available memory on the device. + // In combination, most OpenCL runtimes implementation of clCreateBuffer() + // are doing so called lazy memory allocation on the device. + // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory) + // running on the host we end up with an error type of this: + // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE + // The clEnqueueNDRangeKernel() is because of the lazy allocation + // The best way to workaround this problem is if we would be able to ask for available memory, + // The idea here is to try to evaluate available memory by allocating it till it errors - return false; - } + cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem)); - if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, kernel_name, 0, NULL, NULL) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", jit_error_log); - event_log_error (hashcat_ctx, NULL); + u64 c; - return false; - } + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; - void *cubin = NULL; + cl_int CL_err; - size_t cubin_size = 0; + OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; - if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", jit_error_log); - event_log_error (hashcat_ctx, NULL); + tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err); - return false; - } + if (CL_err != CL_SUCCESS) + { + c--; - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", jit_info_log); - event_log_info (hashcat_ctx, NULL); - #endif + break; + } - if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, cubin, mod_cnt, mod_opts, mod_vals) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", mod_error_log); - event_log_error (hashcat_ctx, NULL); + // transfer only a few byte should be enough to force the runtime to actually allocate the memory - return false; - } + u8 tmp_host[8]; - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", mod_info_log); - event_log_info (hashcat_ctx, NULL); - #endif + if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; - if (cache_disable == false) - { - if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false; - } + if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + } - if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return false; + device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; - hcfree (jit_info_log); - hcfree (jit_error_log); + if (c > 0) + { + device_param->device_available_mem *= c; + } - #else + // clean up - if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, binary, mod_cnt, mod_opts, mod_vals) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", mod_error_log); - event_log_error (hashcat_ctx, NULL); + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; - return false; + if (tmp_device[c] != NULL) + { + if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1; + } + } + + hcfree (tmp_device); } - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", mod_info_log); - event_log_info (hashcat_ctx, NULL); - #endif + hc_clReleaseCommandQueue (hashcat_ctx, command_queue); - if (cache_disable == false) + hc_clReleaseContext (hashcat_ctx, context); + + if (device_param->device_host_unified_memory == 1) { - if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; + // so, we actually have only half the memory because we need the same buffers on host side + + device_param->device_available_mem /= 2; } + } + } - #endif + backend_ctx->target_msec = TARGET_MSEC_PROFILE[user_options->workload_profile - 1]; - hcfree (mod_info_log); - hcfree (mod_error_log); + backend_ctx->need_adl = need_adl; + backend_ctx->need_nvml = need_nvml; + backend_ctx->need_nvapi = need_nvapi; + backend_ctx->need_sysfs_amdgpu = need_sysfs_amdgpu; + backend_ctx->need_sysfs_cpu = need_sysfs_cpu; + backend_ctx->need_iokit = need_iokit; - hcfree (binary); - } + backend_ctx->comptime = comptime; - if (device_param->is_hip == true) - { - hiprtcProgram program; + return 0; +} - if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false; +void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ... + if (backend_ctx->enabled == false) return; - //hiprtc_options[0] = "--restrict"; - //hiprtc_options[1] = "--device-as-default-execution-space"; - //hiprtc_options[2] = "--gpu-architecture"; + for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < backend_ctx->opencl_platforms_cnt; opencl_platforms_idx++) + { + hcfree (backend_ctx->opencl_platforms_devices[opencl_platforms_idx]); + hcfree (backend_ctx->opencl_platforms_name[opencl_platforms_idx]); + hcfree (backend_ctx->opencl_platforms_vendor[opencl_platforms_idx]); + hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]); + } - hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max); + for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; - /* 4.3 linux - hiprtc_options[1] = "-I"; - hiprtc_options[2] = "/opt/rocm/hip/bin/include"; - hiprtc_options[3] = "-I"; - hiprtc_options[4] = "/opt/rocm/include"; - hiprtc_options[5] = "-I"; - */ + hcfree (device_param->device_name); - hiprtc_options[1] = "-nocudainc"; - hiprtc_options[2] = "-nocudalib"; - hiprtc_options[3] = ""; - hiprtc_options[4] = ""; + if (device_param->is_opencl == true) + { + hcfree (device_param->opencl_driver_version); + hcfree (device_param->opencl_device_version); + hcfree (device_param->opencl_device_c_version); + hcfree (device_param->opencl_device_vendor); + } + } - // untested but it should work - #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) - hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", "OpenCL"); - #else - hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", folder_config->cpath_real); - #endif + backend_ctx->backend_devices_cnt = 0; + backend_ctx->backend_devices_active = 0; + backend_ctx->cuda_devices_cnt = 0; + backend_ctx->cuda_devices_active = 0; + backend_ctx->hip_devices_cnt = 0; + backend_ctx->hip_devices_active = 0; + backend_ctx->metal_devices_cnt = 0; + backend_ctx->metal_devices_active = 0; + backend_ctx->opencl_devices_cnt = 0; + backend_ctx->opencl_devices_active = 0; - char *hiprtc_options_string = hcstrdup (build_options_buf); + backend_ctx->need_adl = false; + backend_ctx->need_nvml = false; + backend_ctx->need_nvapi = false; + backend_ctx->need_sysfs_amdgpu = false; + backend_ctx->need_sysfs_cpu = false; + backend_ctx->need_iokit = false; +} - const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6); +void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options); + if (backend_ctx->enabled == false) return; - hcfree (hiprtc_options_string); - hcfree (hiprtc_options); + for (int backend_devices_cnt_src = 0; backend_devices_cnt_src < backend_ctx->backend_devices_cnt; backend_devices_cnt_src++) + { + hc_device_param_t *device_param_src = &backend_ctx->devices_param[backend_devices_cnt_src]; - size_t build_log_size = 0; + if (device_param_src->skipped == true) continue; + if (device_param_src->skipped_warning == true) continue; - hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size); + for (int backend_devices_cnt_dst = backend_devices_cnt_src + 1; backend_devices_cnt_dst < backend_ctx->backend_devices_cnt; backend_devices_cnt_dst++) + { + hc_device_param_t *device_param_dst = &backend_ctx->devices_param[backend_devices_cnt_dst]; - #if defined (DEBUG) - if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1)) - #else - if (rc_hiprtcCompileProgram == -1) - #endif - { - char *build_log = (char *) hcmalloc (build_log_size + 1); + if (device_param_dst->skipped == true) continue; + if (device_param_dst->skipped_warning == true) continue; - if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1) - { - hcfree (build_log); + if (is_same_device_type (device_param_src, device_param_dst) == false) continue; - return false; - } + device_param_dst->kernel_accel = device_param_src->kernel_accel; + device_param_dst->kernel_loops = device_param_src->kernel_loops; + device_param_dst->kernel_threads = device_param_src->kernel_threads; - build_log[build_log_size] = 0; + const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param_dst->device_processors) * device_param_dst->kernel_threads; - puts (build_log); + device_param_dst->hardware_power = hardware_power; - hcfree (build_log); - } + const u32 kernel_power = device_param_dst->hardware_power * device_param_dst->kernel_accel; - if (rc_hiprtcCompileProgram == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + device_param_dst->kernel_power = kernel_power; + } + } +} - return false; - } +void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + status_ctx_t *status_ctx = hashcat_ctx->status_ctx; + user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; + user_options_t *user_options = hashcat_ctx->user_options; - size_t binary_size = 0; + if (backend_ctx->enabled == false) return; - if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false; + u32 kernel_power_all = 0; - char *binary = (char *) hcmalloc (binary_size); + for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; - if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false; + if (device_param->skipped == true) continue; + if (device_param->skipped_warning == true) continue; - if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false; + kernel_power_all += device_param->kernel_power; + } - #define LOG_SIZE 8192 + backend_ctx->kernel_power_all = kernel_power_all; - char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); - char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + /* + * Inform user about possible slow speeds + */ - int mod_cnt = 6; + if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK)) + { + if (status_ctx->words_base < kernel_power_all) + { + if (user_options->quiet == false) + { + clear_prompt (hashcat_ctx); - hipJitOption mod_opts[6]; - void *mod_vals[6]; + event_log_advice (hashcat_ctx, "The wordlist or mask that you are using is too small."); + event_log_advice (hashcat_ctx, "This means that hashcat cannot use the full parallel power of your device(s)."); + event_log_advice (hashcat_ctx, "Unless you supply more work, your cracking speed will drop."); + event_log_advice (hashcat_ctx, "For tips on supplying more work, see: https://hashcat.net/faq/morework"); + event_log_advice (hashcat_ctx, NULL); + } + } + } +} - mod_opts[0] = hipJitOptionTargetFromContext; - mod_vals[0] = (void *) 0; +void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx) +{ + combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx; + hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + hashes_t *hashes = hashcat_ctx->hashes; + mask_ctx_t *mask_ctx = hashcat_ctx->mask_ctx; + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx; + user_options_t *user_options = hashcat_ctx->user_options; + user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; - mod_opts[1] = hipJitOptionLogVerbose; - mod_vals[1] = (void *) 1; + if (backend_ctx->enabled == false) return; - mod_opts[2] = hipJitOptionInfoLogBuffer; - mod_vals[2] = (void *) mod_info_log; + for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; - mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes; - mod_vals[3] = (void *) LOG_SIZE; + if (device_param->skipped == true) continue; + if (device_param->skipped_warning == true) continue; - mod_opts[4] = hipJitOptionErrorLogBuffer; - mod_vals[4] = (void *) mod_error_log; + device_param->kernel_loops_min = device_param->kernel_loops_min_sav; + device_param->kernel_loops_max = device_param->kernel_loops_max_sav; - mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes; - mod_vals[5] = (void *) LOG_SIZE; + if (device_param->kernel_loops_min < device_param->kernel_loops_max) + { + u32 innerloop_cnt = 0; - if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1) + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", mod_error_log); - event_log_error (hashcat_ctx, NULL); - - return false; + if (user_options->slow_candidates == true) + { + innerloop_cnt = 1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) innerloop_cnt = MIN (KERNEL_RULES, (u32) straight_ctx->kernel_rules_cnt); + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) innerloop_cnt = MIN (KERNEL_COMBS, (u32) combinator_ctx->combs_cnt); + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) innerloop_cnt = MIN (KERNEL_BFS, (u32) mask_ctx->bfs_cnt); + } } - - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", mod_info_log); - event_log_info (hashcat_ctx, NULL); - #endif - - if (cache_disable == false) + else { - if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; + innerloop_cnt = hashes->salts_buf[0].salt_iter; } - hcfree (mod_info_log); - hcfree (mod_error_log); - - hcfree (binary); + if ((innerloop_cnt >= device_param->kernel_loops_min) && + (innerloop_cnt <= device_param->kernel_loops_max)) + { + device_param->kernel_loops_max = innerloop_cnt; + } } + } +} - if (device_param->is_opencl == true) - { - size_t build_log_size = 0; - - int CL_rc; +static int get_cuda_kernel_wgs (hashcat_ctx_t *hashcat_ctx, CUfunction function, u32 *result) +{ + int max_threads_per_block; - cl_program p1 = NULL; + if (hc_cuFuncGetAttribute (hashcat_ctx, &max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1; - // workaround opencl issue with Apple Silicon + *result = (u32) max_threads_per_block; - if (strncmp (device_param->device_name, "Apple M", 7) == 0) - { - if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, opencl_program) == -1) return false; + return 0; +} - CL_rc = hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL); +static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result) +{ + int shared_size_bytes; - hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size); - } - else - { - if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &p1) == -1) return false; + if (hc_cuFuncGetAttribute (hashcat_ctx, &shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1; - CL_rc = hc_clCompileProgram (hashcat_ctx, p1, 1, &device_param->opencl_device, build_options_buf, 0, NULL, NULL, NULL, NULL); + *result = (u64) shared_size_bytes; - hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size); - } + return 0; +} - #if defined (DEBUG) - if ((build_log_size > 1) || (CL_rc == -1)) - #else - if (CL_rc == -1) - #endif - { - char *build_log = (char *) hcmalloc (build_log_size + 1); +static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u32 *result) +{ + int max_threads_per_block; - int rc_clGetProgramBuildInfo; + if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1; - if (strncmp (device_param->device_name, "Apple M", 7) == 0) - { - rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL); - } - else - { - rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL); - } + *result = (u32) max_threads_per_block; - if (rc_clGetProgramBuildInfo == -1) - { - hcfree (build_log); + return 0; +} - return false; - } +static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u64 *result) +{ + int shared_size_bytes; - build_log[build_log_size] = 0; + if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1; - puts (build_log); + *result = (u64) shared_size_bytes; - hcfree (build_log); - } + return 0; +} - if (CL_rc == -1) return false; +static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) +{ + size_t work_group_size = 0; - // workaround opencl issue with Apple Silicon + if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL) == -1) return -1; - if (strncmp (device_param->device_name, "Apple M", 7) != 0) - { - cl_program t2[1]; + u32 kernel_threads = (u32) work_group_size; - t2[0] = p1; + size_t compile_work_group_size[3] = { 0, 0, 0 }; - cl_program fin; + if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL) == -1) return -1; - if (hc_clLinkProgram (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, NULL, 1, t2, NULL, NULL, &fin) == -1) return false; + const size_t cwgs_total = compile_work_group_size[0] * compile_work_group_size[1] * compile_work_group_size[2]; - // it seems errors caused by clLinkProgram() do not go into CL_PROGRAM_BUILD - // I couldn't find any information on the web explaining how else to retrieve the error messages from the linker + if (cwgs_total > 0) + { + kernel_threads = MIN (kernel_threads, (u32) cwgs_total); + } - *opencl_program = fin; + *result = kernel_threads; - hc_clReleaseProgram (hashcat_ctx, p1); - } + return 0; +} - if (cache_disable == false) - { - size_t binary_size; +static int get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) +{ + size_t preferred_work_group_size_multiple = 0; - if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL) == -1) return false; + if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof (preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL) == -1) return -1; - char *binary = (char *) hcmalloc (binary_size); + *result = (u32) preferred_work_group_size_multiple; - if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL) == -1) return false; + return 0; +} - if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; +static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result) +{ + cl_ulong local_mem_size = 0; - hcfree (binary); - } - } - } - else - { - if (read_kernel_binary (hashcat_ctx, cached_file, kernel_lengths, kernel_sources) == false) return false; + if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (local_mem_size), &local_mem_size, NULL) == -1) return -1; - if (device_param->is_cuda == true) - { - #define LOG_SIZE 8192 + *result = local_mem_size; - char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); - char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + return 0; +} - int mod_cnt = 6; +static int get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result) +{ + cl_ulong dynamic_local_mem_size = 0; - CUjit_option mod_opts[7]; - void *mod_vals[7]; + if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (dynamic_local_mem_size), &dynamic_local_mem_size, NULL) == -1) return -1; - mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT; - mod_vals[0] = (void *) 0; + // unknown how to query this information in OpenCL + // we therefore reset to zero + // the above call to hc_clGetKernelWorkGroupInfo() is just to avoid compiler warnings - mod_opts[1] = CU_JIT_LOG_VERBOSE; - mod_vals[1] = (void *) 1; + dynamic_local_mem_size = 0; - mod_opts[2] = CU_JIT_INFO_LOG_BUFFER; - mod_vals[2] = (void *) mod_info_log; + *result = dynamic_local_mem_size; - mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - mod_vals[3] = (void *) LOG_SIZE; + return 0; +} - mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER; - mod_vals[4] = (void *) mod_error_log; +#if defined (__APPLE__) +static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module, mtl_library *metal_library) +#else +static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module, MAYBE_UNUSED void *metal_library) +#endif +{ + const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + const user_options_t *user_options = hashcat_ctx->user_options; - mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; - mod_vals[5] = (void *) LOG_SIZE; + #if !defined (_WIN) && !defined (__CYGWIN__) && !defined (__MSYS__) + const folder_config_t *folder_config = hashcat_ctx->folder_config; + #endif - if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) - { - mod_opts[6] = CU_JIT_MAX_REGISTERS; - mod_vals[6] = (void *) 128; + bool cached = true; - mod_cnt++; - } + if (cache_disable == true) + { + cached = false; + } - if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", mod_error_log); - event_log_error (hashcat_ctx, NULL); + if (hc_path_read (cached_file) == false) + { + cached = false; + } - return false; - } + if (hc_path_is_empty (cached_file) == true) + { + cached = false; + } - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", mod_info_log); - event_log_info (hashcat_ctx, NULL); - #endif + /** + * kernel compile or load + */ - hcfree (mod_info_log); - hcfree (mod_error_log); - } + size_t kernel_lengths_buf = 0; - if (device_param->is_hip == true) - { - #define LOG_SIZE 8192 + size_t *kernel_lengths = &kernel_lengths_buf; - char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); - char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + char *kernel_sources_buf = NULL; - int mod_cnt = 6; + char **kernel_sources = &kernel_sources_buf; - hipJitOption mod_opts[6]; - void *mod_vals[6]; + if (cached == false) + { + #if defined (DEBUG) + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache. Please be patient...", device_param->device_id + 1, filename_from_filepath (cached_file)); + #endif - mod_opts[0] = hipJitOptionTargetFromContext; - mod_vals[0] = (void *) 0; + if (read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources) == false) return false; - mod_opts[1] = hipJitOptionLogVerbose; - mod_vals[1] = (void *) 1; + if (device_param->is_cuda == true) + { + nvrtcProgram program; - mod_opts[2] = hipJitOptionInfoLogBuffer; - mod_vals[2] = (void *) mod_info_log; + if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false; - mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes; - mod_vals[3] = (void *) LOG_SIZE; + char **nvrtc_options = (char **) hccalloc (5 + strlen (build_options_buf) + 1, sizeof (char *)); // ... - mod_opts[4] = hipJitOptionErrorLogBuffer; - mod_vals[4] = (void *) mod_error_log; + nvrtc_options[0] = "--restrict"; + nvrtc_options[1] = "--device-as-default-execution-space"; + nvrtc_options[2] = "--gpu-architecture"; - mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes; - mod_vals[5] = (void *) LOG_SIZE; + hc_asprintf (&nvrtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor); - if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", mod_error_log); - event_log_error (hashcat_ctx, NULL); - - return false; - } - - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", mod_info_log); - event_log_info (hashcat_ctx, NULL); + // untested on windows, but it should work + #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) + hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", "OpenCL"); + #else + hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", folder_config->cpath_real); #endif - hcfree (mod_info_log); - hcfree (mod_error_log); - } + char *nvrtc_options_string = hcstrdup (build_options_buf); - if (device_param->is_opencl == true) - { - if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false; + const int num_options = 5 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 5); - if (hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL) == -1) return false; - } - } + const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options); - hcfree (kernel_sources[0]); + hcfree (nvrtc_options_string); + hcfree (nvrtc_options); - return true; -} + size_t build_log_size = 0; -int backend_session_begin (hashcat_ctx_t *hashcat_ctx) -{ - const bitmap_ctx_t *bitmap_ctx = hashcat_ctx->bitmap_ctx; - const folder_config_t *folder_config = hashcat_ctx->folder_config; - const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - const hashes_t *hashes = hashcat_ctx->hashes; - const module_ctx_t *module_ctx = hashcat_ctx->module_ctx; - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - const straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx; - const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; - const user_options_t *user_options = hashcat_ctx->user_options; + hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size); - if (backend_ctx->enabled == false) return 0; + #if defined (DEBUG) + if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1)) + #else + if (rc_nvrtcCompileProgram == -1) + #endif + { + char *build_log = (char *) hcmalloc (build_log_size + 1); - u64 size_total_host_all = 0; + if (hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log) == -1) + { + hcfree (build_log); - u32 hardware_power_all = 0; + return false; + } - int backend_memory_hit_warnings = 0; - int backend_runtime_skip_warnings = 0; - int backend_kernel_build_warnings = 0; - int backend_kernel_create_warnings = 0; - int backend_kernel_accel_warnings = 0; - int backend_extra_size_warning = 0; + build_log[build_log_size] = 0; - backend_ctx->memory_hit_warning = false; - backend_ctx->runtime_skip_warning = false; - backend_ctx->kernel_build_warning = false; - backend_ctx->kernel_create_warning = false; - backend_ctx->kernel_accel_warnings = false; - backend_ctx->extra_size_warning = false; - backend_ctx->mixed_warnings = false; + puts (build_log); - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) - { - /** - * host buffer - */ + hcfree (build_log); + } - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + if (rc_nvrtcCompileProgram == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); - if (device_param->skipped == true) continue; + return false; + } - EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int)); + size_t binary_size = 0; - const int device_id = device_param->device_id; + if (hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size) == -1) return false; - /** - * module depending checks - */ + char *binary = (char *) hcmalloc (binary_size); - device_param->skipped_warning = false; + if (hc_nvrtcGetPTX (hashcat_ctx, program, binary) == -1) return false; - if (module_ctx->module_unstable_warning != MODULE_DEFAULT) - { - const bool unstable_warning = module_ctx->module_unstable_warning (hashconfig, user_options, user_options_extra, device_param); + if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return false; - if ((unstable_warning == true) && (user_options->force == false)) - { - event_log_warning (hashcat_ctx, "* Device #%u: Skipping (hash-mode %u)", device_id + 1, hashconfig->hash_mode); - event_log_warning (hashcat_ctx, " This is due to a known CUDA/HIP/OpenCL runtime/driver issue (not a hashcat issue)"); - event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors."); + #define LOG_SIZE 8192 - backend_runtime_skip_warnings++; + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); - device_param->skipped_warning = true; - continue; - } - } + int mod_cnt = 6; - /** - * tuning db - */ + CUjit_option mod_opts[7]; + void *mod_vals[7]; - if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT) - { - const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra); + mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT; + mod_vals[0] = (void *) 0; - char *lines_buf = hcstrdup (extra_tuningdb_block); + mod_opts[1] = CU_JIT_LOG_VERBOSE; + mod_vals[1] = (void *) 1; - char *saveptr = NULL; + mod_opts[2] = CU_JIT_INFO_LOG_BUFFER; + mod_vals[2] = (void *) mod_info_log; - char *next = strtok_r (lines_buf, "\n", &saveptr); + mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_vals[3] = (void *) LOG_SIZE; - int line_num = 0; + mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER; + mod_vals[4] = (void *) mod_error_log; - do + mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) { - line_num++; + mod_opts[6] = CU_JIT_MAX_REGISTERS; + mod_vals[6] = (void *) 128; - const size_t line_len = strlen (next); + mod_cnt++; + } - if (line_len == 0) continue; + #if defined (WITH_CUBIN) - if (next[0] == '#') continue; + char *jit_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1); - tuning_db_process_line (hashcat_ctx, next, line_num); + int jit_cnt = 6; - } while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL); + CUjit_option jit_opts[7]; + void *jit_vals[7]; - hcfree (lines_buf); + jit_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT; + jit_vals[0] = (void *) 0; - // todo: print loaded 'cnt' message + jit_opts[1] = CU_JIT_LOG_VERBOSE; + jit_vals[1] = (void *) 1; - // sort the database + jit_opts[2] = CU_JIT_INFO_LOG_BUFFER; + jit_vals[2] = (void *) jit_info_log; - tuning_db_t *tuning_db = hashcat_ctx->tuning_db; + jit_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + jit_vals[3] = (void *) LOG_SIZE; - qsort (tuning_db->alias_buf, tuning_db->alias_cnt, sizeof (tuning_db_alias_t), sort_by_tuning_db_alias); - qsort (tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry); - } + jit_opts[4] = CU_JIT_ERROR_LOG_BUFFER; + jit_vals[4] = (void *) jit_error_log; - // vector_width + jit_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + jit_vals[5] = (void *) LOG_SIZE; - int vector_width = 0; + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + jit_opts[6] = CU_JIT_MAX_REGISTERS; + jit_vals[6] = (void *) 128; - if (user_options->backend_vector_width_chgd == false) - { - // tuning db + jit_cnt++; + } - tuning_db_entry_t *tuningdb_entry; + CUlinkState state; - if (user_options->slow_candidates == true) - { - tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode); - } - else + if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1) { - tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode); + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; } - if (tuningdb_entry == NULL || tuningdb_entry->vector_width == -1) + if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, kernel_name, 0, NULL, NULL) == -1) { - if (hashconfig->opti_type & OPTI_TYPE_USES_BITS_64) - { - if (device_param->is_cuda == true) - { - // cuda does not support this query + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); - vector_width = 1; - } + return false; + } - if (device_param->is_hip == true) - { - // hip does not support this query + void *cubin = NULL; - vector_width = 1; - } + size_t cubin_size = 0; - if (device_param->is_opencl == true) - { - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1) - { - device_param->skipped = true; - continue; - } - } - } - else - { - if (device_param->is_cuda == true) - { - // cuda does not support this query + if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); - vector_width = 1; - } + return false; + } - if (device_param->is_hip == true) - { - // hip does not support this query + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", jit_info_log); + event_log_info (hashcat_ctx, NULL); + #endif - vector_width = 1; - } + if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, cubin, mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); - if (device_param->is_opencl == true) - { - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL) == -1) - { - device_param->skipped = true; - continue; - } - } - } + return false; } - else + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) { - vector_width = (cl_uint) tuningdb_entry->vector_width; + if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false; } - } - else - { - vector_width = user_options->backend_vector_width; - } - // We can't have SIMD in kernels where we have an unknown final password length - // It also turns out that pure kernels (that have a higher register pressure) - // actually run faster on scalar GPU (like 1080) without SIMD + if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return false; - if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0) - { - if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + hcfree (jit_info_log); + hcfree (jit_error_log); + + #else + + if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, binary, mod_cnt, mod_opts, mod_vals) == -1) { - vector_width = 1; + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) + { + if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; } + + #endif + + hcfree (mod_info_log); + hcfree (mod_error_log); + + hcfree (binary); } - if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) + if (device_param->is_hip == true) { - // not working in this mode because the GID does not align with password candidate count - // and if it cracks, it will crack the same hash twice, running into segfaults + hiprtcProgram program; - vector_width = 1; - } + if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false; - if (vector_width > 16) vector_width = 16; + char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ... - device_param->vector_width = vector_width; + //hiprtc_options[0] = "--restrict"; + //hiprtc_options[1] = "--device-as-default-execution-space"; + //hiprtc_options[2] = "--gpu-architecture"; - /** - * kernel accel and loops tuning db adjustment - */ + hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max); - device_param->kernel_accel_min = hashconfig->kernel_accel_min; - device_param->kernel_accel_max = hashconfig->kernel_accel_max; - device_param->kernel_loops_min = hashconfig->kernel_loops_min; - device_param->kernel_loops_max = hashconfig->kernel_loops_max; - device_param->kernel_threads_min = hashconfig->kernel_threads_min; - device_param->kernel_threads_max = hashconfig->kernel_threads_max; + /* 4.3 linux + hiprtc_options[1] = "-I"; + hiprtc_options[2] = "/opt/rocm/hip/bin/include"; + hiprtc_options[3] = "-I"; + hiprtc_options[4] = "/opt/rocm/include"; + hiprtc_options[5] = "-I"; + */ - tuning_db_entry_t *tuningdb_entry = NULL; + hiprtc_options[1] = "-nocudainc"; + hiprtc_options[2] = "-nocudalib"; + hiprtc_options[3] = ""; + hiprtc_options[4] = ""; - if (user_options->slow_candidates == true) - { - tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode); - } - else - { - tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode); - } + // untested but it should work + #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) + hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", "OpenCL"); + #else + hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", folder_config->cpath_real); + #endif - // user commandline option override tuning db - // but both have to stay inside the boundaries of the module + char *hiprtc_options_string = hcstrdup (build_options_buf); - if (user_options->kernel_accel_chgd == true) - { - const u32 _kernel_accel = user_options->kernel_accel; + const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6); - if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max)) - { - device_param->kernel_accel_min = _kernel_accel; - device_param->kernel_accel_max = _kernel_accel; - } - } - else - { - if (tuningdb_entry != NULL) - { - const u32 _kernel_accel = tuningdb_entry->kernel_accel; + const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options); - if (_kernel_accel == (u32) -1) // native, makes sense if OPTS_TYPE_MP_MULTI_DISABLE is used - { - if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) - { - if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT) - { - event_log_warning (hashcat_ctx, "ATTENTION! This hash-mode requires manual tuning to achieve full performance."); - event_log_warning (hashcat_ctx, "The loss of performance can be greater than 100%% without manual tuning."); - event_log_warning (hashcat_ctx, NULL); - event_log_warning (hashcat_ctx, "This warning message disappears after a definition for the installed"); - event_log_warning (hashcat_ctx, "compute-device in this computer has been added to either list:"); - event_log_warning (hashcat_ctx, "- src/modules/module_%05d.c", hashconfig->hash_mode); - event_log_warning (hashcat_ctx, "- hashcat.hctune"); - event_log_warning (hashcat_ctx, NULL); - event_log_warning (hashcat_ctx, "For instructions on tuning, see src/modules/module_%05d.c", hashconfig->hash_mode); - event_log_warning (hashcat_ctx, "Also, consider sending a PR to Hashcat Master so that other users can benefit from your work."); - event_log_warning (hashcat_ctx, NULL); - } - } + hcfree (hiprtc_options_string); + hcfree (hiprtc_options); - device_param->kernel_accel_min = device_param->device_processors; - device_param->kernel_accel_max = device_param->device_processors; - } - else - { - if (_kernel_accel) - { - if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max)) - { - device_param->kernel_accel_min = _kernel_accel; - device_param->kernel_accel_max = _kernel_accel; - } - } - } - } - } + size_t build_log_size = 0; - if (user_options->kernel_loops_chgd == true) - { - const u32 _kernel_loops = user_options->kernel_loops; + hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size); - if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max)) - { - device_param->kernel_loops_min = _kernel_loops; - device_param->kernel_loops_max = _kernel_loops; - } - } - else - { - if (tuningdb_entry != NULL) + #if defined (DEBUG) + if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1)) + #else + if (rc_hiprtcCompileProgram == -1) + #endif { - u32 _kernel_loops = tuningdb_entry->kernel_loops; + char *build_log = (char *) hcmalloc (build_log_size + 1); - if (_kernel_loops) + if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1) { - if (user_options->workload_profile == 1) - { - _kernel_loops = (_kernel_loops > 8) ? _kernel_loops / 8 : 1; - } - else if (user_options->workload_profile == 2) - { - _kernel_loops = (_kernel_loops > 4) ? _kernel_loops / 4 : 1; - } + hcfree (build_log); - if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max)) - { - device_param->kernel_loops_min = _kernel_loops; - device_param->kernel_loops_max = _kernel_loops; - } + return false; } - } - } - // there's no thread column in tuning db, stick to commandline if defined + build_log[build_log_size] = 0; - if (user_options->kernel_threads_chgd == true) - { - const u32 _kernel_threads = user_options->kernel_threads; + puts (build_log); - if ((_kernel_threads >= device_param->kernel_threads_min) && (_kernel_threads <= device_param->kernel_threads_max)) - { - device_param->kernel_threads_min = _kernel_threads; - device_param->kernel_threads_max = _kernel_threads; + hcfree (build_log); } - } - - if (user_options->slow_candidates == true) - { - } - else - { - // we have some absolute limits for fast hashes (because of limit constant memory), make sure not to overstep - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + if (rc_hiprtcCompileProgram == -1) { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) - { - device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_RULES); - device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_RULES); - } - else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) - { - device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_COMBS); - device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_COMBS); - } - else if (user_options_extra->attack_kern == ATTACK_KERN_BF) - { - device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_BFS); - device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_BFS); - } + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + return false; } - } - device_param->kernel_loops_min_sav = device_param->kernel_loops_min; - device_param->kernel_loops_max_sav = device_param->kernel_loops_max; + size_t binary_size = 0; - /** - * device properties - */ + if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false; - const u32 device_processors = device_param->device_processors; + char *binary = (char *) hcmalloc (binary_size); - /** - * device threads - */ + if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false; - if (hashconfig->opts_type & OPTS_TYPE_MAXIMUM_THREADS) - { - // default for all, because the else branch is doing the same (nothing), but is actually used as a way to - // disable the default native thread configuration for HIP - // this can have negative performance if not tested on multiple different gpu architectures - } - else if (hashconfig->opts_type & OPTS_TYPE_NATIVE_THREADS) - { - u32 native_threads = 0; + if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false; - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) - { - native_threads = 1; - } - else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + #define LOG_SIZE 8192 + + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int mod_cnt = 6; + + hipJitOption mod_opts[6]; + void *mod_vals[6]; + + mod_opts[0] = hipJitOptionTargetFromContext; + mod_vals[0] = (void *) 0; + + mod_opts[1] = hipJitOptionLogVerbose; + mod_vals[1] = (void *) 1; + + mod_opts[2] = hipJitOptionInfoLogBuffer; + mod_vals[2] = (void *) mod_info_log; + + mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes; + mod_vals[3] = (void *) LOG_SIZE; + + mod_opts[4] = hipJitOptionErrorLogBuffer; + mod_vals[4] = (void *) mod_error_log; + + mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes; + mod_vals[5] = (void *) LOG_SIZE; + + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1) { - native_threads = device_param->kernel_preferred_wgs_multiple; + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; } - else + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) { - // abort? + if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; } - if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max)) + hcfree (mod_info_log); + hcfree (mod_error_log); + + hcfree (binary); + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + mtl_library metal_lib = NULL; + + if (hc_mtlCreateLibraryWithSource (hashcat_ctx, device_param->metal_device, kernel_sources[0], build_options_buf, folder_config->cpath_real, &metal_lib) == -1) return false; + + *metal_library = metal_lib; + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful.", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, NULL); + #endif + } + #endif // __APPLE__ + + if (device_param->is_opencl == true) + { + size_t build_log_size = 0; + + int CL_rc; + + cl_program p1 = NULL; + + // workaround opencl issue with Apple Silicon + + if (strncmp (device_param->device_name, "Apple M", 7) == 0) { - device_param->kernel_threads_min = native_threads; - device_param->kernel_threads_max = native_threads; + if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, opencl_program) == -1) return false; + + CL_rc = hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL); + + hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size); } else { - // abort? + if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &p1) == -1) return false; + + CL_rc = hc_clCompileProgram (hashcat_ctx, p1, 1, &device_param->opencl_device, build_options_buf, 0, NULL, NULL, NULL, NULL); + + hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size); } - } - else - { - if (device_param->is_hip == true) + + #if defined (DEBUG) + if ((build_log_size > 1) || (CL_rc == -1)) + #else + if (CL_rc == -1) + #endif { - const u32 native_threads = device_param->kernel_preferred_wgs_multiple; + char *build_log = (char *) hcmalloc (build_log_size + 1); - if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max)) + int rc_clGetProgramBuildInfo; + + if (strncmp (device_param->device_name, "Apple M", 7) == 0) { - device_param->kernel_threads_min = native_threads; - device_param->kernel_threads_max = native_threads; + rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL); } else { - // abort? + rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL); } - } - } - // this seems to work always + if (rc_clGetProgramBuildInfo == -1) + { + hcfree (build_log); - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) - { - u32 native_threads = 1; + return false; + } - if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max)) - { - device_param->kernel_threads_min = native_threads; - device_param->kernel_threads_max = native_threads; - } - } + build_log[build_log_size] = 0; - /** - * create context for each device - */ + puts (build_log); - if (device_param->is_cuda == true) - { - if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) - { - device_param->skipped = true; - continue; + hcfree (build_log); } - if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) - { - device_param->skipped = true; - continue; - } - } + if (CL_rc == -1) return false; - if (device_param->is_hip == true) - { - if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1) - { - device_param->skipped = true; - continue; - } + // workaround opencl issue with Apple Silicon - if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) + if (strncmp (device_param->device_name, "Apple M", 7) != 0) { - device_param->skipped = true; - continue; - } - } + cl_program t2[1]; - if (device_param->is_opencl == true) - { - /* - cl_context_properties properties[3]; + t2[0] = p1; - properties[0] = CL_CONTEXT_PLATFORM; - properties[1] = (cl_context_properties) device_param->opencl_platform; - properties[2] = 0; + cl_program fin; - CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context); - */ + if (hc_clLinkProgram (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, NULL, 1, t2, NULL, NULL, &fin) == -1) return false; - if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1) - { - device_param->skipped = true; - continue; - } + // it seems errors caused by clLinkProgram() do not go into CL_PROGRAM_BUILD + // I couldn't find any information on the web explaining how else to retrieve the error messages from the linker - /** - * create command-queue - */ + *opencl_program = fin; - // not supported with NV - // device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL); + hc_clReleaseProgram (hashcat_ctx, p1); + } - if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1) + if (cache_disable == false) { - device_param->skipped = true; - continue; - } - } + size_t binary_size; - /** - * create stream for CUDA devices - */ + if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL) == -1) return false; - if (device_param->is_cuda == true) - { - if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1) - { - device_param->skipped = true; - continue; - } - } + char *binary = (char *) hcmalloc (binary_size); - /** - * create stream for HIP devices - */ + if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL) == -1) return false; - if (device_param->is_hip == true) - { - if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1) - { - device_param->skipped = true; - continue; + if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; + + hcfree (binary); } } - - /** - * create events for CUDA devices - */ + } + else + { + if (read_kernel_binary (hashcat_ctx, cached_file, kernel_lengths, kernel_sources) == false) return false; if (device_param->is_cuda == true) { - if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event1, CU_EVENT_BLOCKING_SYNC) == -1) - { - device_param->skipped = true; - continue; - } + #define LOG_SIZE 8192 - if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_BLOCKING_SYNC) == -1) + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int mod_cnt = 6; + + CUjit_option mod_opts[7]; + void *mod_vals[7]; + + mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT; + mod_vals[0] = (void *) 0; + + mod_opts[1] = CU_JIT_LOG_VERBOSE; + mod_vals[1] = (void *) 1; + + mod_opts[2] = CU_JIT_INFO_LOG_BUFFER; + mod_vals[2] = (void *) mod_info_log; + + mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_vals[3] = (void *) LOG_SIZE; + + mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER; + mod_vals[4] = (void *) mod_error_log; + + mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) { - device_param->skipped = true; - continue; + mod_opts[6] = CU_JIT_MAX_REGISTERS; + mod_vals[6] = (void *) 128; + + mod_cnt++; } - if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event3, CU_EVENT_DISABLE_TIMING) == -1) + if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) { - device_param->skipped = true; - continue; + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; } - } - /** - * create events for HIP devices - */ + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + hcfree (mod_info_log); + hcfree (mod_error_log); + } if (device_param->is_hip == true) { - if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1) - { - device_param->skipped = true; - continue; - } + #define LOG_SIZE 8192 - if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1) - { - device_param->skipped = true; - continue; - } + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); - if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1) - { - device_param->skipped = true; - continue; - } - } + int mod_cnt = 6; - /** - * create input buffers on device : calculate size of fixed memory buffers - */ + hipJitOption mod_opts[6]; + void *mod_vals[6]; - u64 size_root_css = SP_PW_MAX * sizeof (cs_t); - u64 size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t); + mod_opts[0] = hipJitOptionTargetFromContext; + mod_vals[0] = (void *) 0; - device_param->size_root_css = size_root_css; - device_param->size_markov_css = size_markov_css; + mod_opts[1] = hipJitOptionLogVerbose; + mod_vals[1] = (void *) 1; - u64 size_results = sizeof (u32); + mod_opts[2] = hipJitOptionInfoLogBuffer; + mod_vals[2] = (void *) mod_info_log; - device_param->size_results = size_results; + mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes; + mod_vals[3] = (void *) LOG_SIZE; - u64 size_rules = (u64) straight_ctx->kernel_rules_cnt * sizeof (kernel_rule_t); - u64 size_rules_c = (u64) KERNEL_RULES * sizeof (kernel_rule_t); + mod_opts[4] = hipJitOptionErrorLogBuffer; + mod_vals[4] = (void *) mod_error_log; - device_param->size_rules = size_rules; - device_param->size_rules_c = size_rules_c; + mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes; + mod_vals[5] = (void *) LOG_SIZE; - u64 size_plains = (u64) hashes->digests_cnt * sizeof (plain_t); - u64 size_salts = (u64) hashes->salts_cnt * sizeof (salt_t); - u64 size_esalts = (u64) hashes->digests_cnt * hashconfig->esalt_size; - u64 size_shown = (u64) hashes->digests_cnt * sizeof (u32); - u64 size_digests = (u64) hashes->digests_cnt * (u64) hashconfig->dgst_size; + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); - device_param->size_plains = size_plains; - device_param->size_digests = size_digests; - device_param->size_shown = size_shown; - device_param->size_salts = size_salts; - device_param->size_esalts = size_esalts; + return false; + } - u64 size_combs = KERNEL_COMBS * sizeof (pw_t); - u64 size_bfs = KERNEL_BFS * sizeof (bf_t); - u64 size_tm = 32 * sizeof (bs_word_t); - u64 size_kernel_params = 1 * sizeof (kernel_param_t); + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif - device_param->size_bfs = size_bfs; - device_param->size_combs = size_combs; - device_param->size_tm = size_tm; - device_param->size_kernel_params = size_kernel_params; + hcfree (mod_info_log); + hcfree (mod_error_log); + } - u64 size_st_digests = 1 * hashconfig->dgst_size; - u64 size_st_salts = 1 * sizeof (salt_t); - u64 size_st_esalts = 1 * hashconfig->esalt_size; + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + mtl_library metal_lib = NULL; - device_param->size_st_digests = size_st_digests; - device_param->size_st_salts = size_st_salts; - device_param->size_st_esalts = size_st_esalts; + if (hc_mtlCreateLibraryWithFile (hashcat_ctx, device_param->metal_device, cached_file, &metal_lib) == -1) return false; - // extra buffer + *metal_library = metal_lib; - u64 size_extra_buffer = 4; + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful.", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, NULL); + #endif + } + #endif - if (module_ctx->module_extra_buffer_size != MODULE_DEFAULT) + if (device_param->is_opencl == true) { - const u64 extra_buffer_size = module_ctx->module_extra_buffer_size (hashconfig, user_options, user_options_extra, hashes, device_param); + if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false; - if (extra_buffer_size == (u64) -1) - { - event_log_error (hashcat_ctx, "Invalid extra buffer size."); + if (hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL) == -1) return false; + } + } - backend_extra_size_warning++; + hcfree (kernel_sources[0]); - device_param->skipped_warning = true; - continue; - } + return true; +} - device_param->extra_buffer_size = extra_buffer_size; +int backend_session_begin (hashcat_ctx_t *hashcat_ctx) +{ + const bitmap_ctx_t *bitmap_ctx = hashcat_ctx->bitmap_ctx; + const folder_config_t *folder_config = hashcat_ctx->folder_config; + const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + const hashes_t *hashes = hashcat_ctx->hashes; + const module_ctx_t *module_ctx = hashcat_ctx->module_ctx; + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + const straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx; + const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; + const user_options_t *user_options = hashcat_ctx->user_options; - // for the size we actually allocate we need to cheat a bit in order to make it more easy for plugin developer. - // - // we will divide this size by 4 to workaround opencl limitation. - // this collides with a theoretical scenario (like -n1 -T1) where there's only one workitem, - // because inside the kernel the target buffer is selected by workitem_id / 4. - // but the maximum size of the buffer would be only 1/4 of what is needed -> overflow. - // - // to workaround this we make sure that there's always a full buffer in each of the 4 allocated buffers available. + if (backend_ctx->enabled == false) return 0; - const u64 kernel_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + u64 size_total_host_all = 0; - const u64 extra_buffer_size_one = extra_buffer_size / kernel_power_max; + u32 hardware_power_all = 0; - size_extra_buffer = extra_buffer_size + (extra_buffer_size_one * 4); - } + int backend_memory_hit_warnings = 0; + int backend_runtime_skip_warnings = 0; + int backend_kernel_build_warnings = 0; + int backend_kernel_create_warnings = 0; + int backend_kernel_accel_warnings = 0; + int backend_extra_size_warning = 0; - // kern type + backend_ctx->memory_hit_warning = false; + backend_ctx->runtime_skip_warning = false; + backend_ctx->kernel_build_warning = false; + backend_ctx->kernel_create_warning = false; + backend_ctx->kernel_accel_warnings = false; + backend_ctx->extra_size_warning = false; + backend_ctx->mixed_warnings = false; - u32 kern_type = hashconfig->kern_type; + for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + { + /** + * host buffer + */ - if (module_ctx->module_kern_type_dynamic != MODULE_DEFAULT) - { - if (user_options->benchmark == true) - { - } - else - { - void *digests_buf = hashes->digests_buf; - salt_t *salts_buf = hashes->salts_buf; - void *esalts_buf = hashes->esalts_buf; - void *hook_salts_buf = hashes->hook_salts_buf; - hashinfo_t **hash_info = hashes->hash_info; + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; - hashinfo_t *hash_info_ptr = NULL; + if (device_param->skipped == true) continue; - if (hash_info) hash_info_ptr = hash_info[0]; + EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int)); - kern_type = (u32) module_ctx->module_kern_type_dynamic (hashconfig, digests_buf, salts_buf, esalts_buf, hook_salts_buf, hash_info_ptr); - } - } + const int device_id = device_param->device_id; - // built options + /** + * module depending checks + */ - const size_t build_options_sz = 4096; + device_param->skipped_warning = false; - char *build_options_buf = (char *) hcmalloc (build_options_sz); + if (module_ctx->module_unstable_warning != MODULE_DEFAULT) + { + const bool unstable_warning = module_ctx->module_unstable_warning (hashconfig, user_options, user_options_extra, device_param); - int build_options_len = 0; + if ((unstable_warning == true) && (user_options->force == false)) + { + char runtime_name[7]; - if ((device_param->is_cuda == true) || (device_param->is_hip == true)) - { - // using a path with a space will break nvrtc_make_options_array_from_string() - // we add it to options array in a clean way later + memset (runtime_name, 0, sizeof (runtime_name)); - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC "); - } - else - { - // tested on windows, linux, apple intel, apple silicon - // when is builded with cygwin and msys, cpath_real doesn't work - #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", "OpenCL"); - #else - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", folder_config->cpath_real); - #endif + if (device_param->is_cuda == true) memcpy (runtime_name, "CUDA", 4); + if (device_param->is_hip == true) memcpy (runtime_name, "HIP", 3); + #if defined (__APPLE__) + if (device_param->is_metal == true) memcpy (runtime_name, "Metal", 5); + #endif + if (device_param->is_opencl == true) memcpy (runtime_name, "OpenCL", 6); - #if defined (__APPLE__) - if (is_apple_silicon() == true) - { - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D IS_APPLE_SILICON "); - } - #endif - } + event_log_warning (hashcat_ctx, "* Device #%u: Skipping (hash-mode %u)", device_id + 1, hashconfig->hash_mode); + event_log_warning (hashcat_ctx, " This is due to a known %s runtime and/or device driver issue (not a hashcat issue)", runtime_name); + event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors."); + event_log_warning (hashcat_ctx, NULL); - /* currently disabled, hangs NEO drivers since 20.09. - was required for NEO driver 20.08 to workaround the same issue! - we go with the latest version + backend_runtime_skip_warnings++; - if (device_param->is_opencl == true) - { - if (device_param->use_opencl12 == true) - { - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.2 "); - } - else if (device_param->use_opencl20 == true) - { - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.0 "); - } - else if (device_param->use_opencl21 == true) - { - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.1 "); + device_param->skipped_warning = true; + continue; } } - */ - // we don't have sm_* on vendors not NV but it doesn't matter + /** + * tuning db + */ - #if defined (DEBUG) - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode); - #else - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode); - #endif + if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT) + { + const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra); - build_options_buf[build_options_len] = 0; + char *lines_buf = hcstrdup (extra_tuningdb_block); - /* - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) - { - if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK) - { - strncat (build_options_buf, " -cl-opt-disable", 16); - } - } - */ + char *saveptr = NULL; - #if defined (DEBUG) - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", device_id + 1, build_options_buf); - #endif + char *next = strtok_r (lines_buf, "\n", &saveptr); - /** - * device_name_chksum_amp_mp - */ + int line_num = 0; - char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 }; + do + { + line_num++; - const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u", - backend_ctx->comptime, - backend_ctx->cuda_driver_version, - backend_ctx->hip_runtimeVersion, - device_param->is_opencl, - device_param->opencl_platform_vendor_id, - device_param->device_name, - device_param->opencl_device_version, - device_param->opencl_driver_version, - (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max); + const size_t line_len = strlen (next); - md5_ctx_t md5_ctx; + if (line_len == 0) continue; - md5_init (&md5_ctx); - md5_update (&md5_ctx, (u32 *) device_name_chksum_amp_mp, dnclen_amp_mp); - md5_final (&md5_ctx); + if (next[0] == '#') continue; - snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]); + tuning_db_process_line (hashcat_ctx, next, line_num); - /** - * kernel cache - */ + } while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL); - bool cache_disable = false; + hcfree (lines_buf); - // Seems to be completely broken on Apple + (Intel?) CPU - // To reproduce set cache_disable to false and run benchmark -b + // todo: print loaded 'cnt' message - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) - { - cache_disable = true; - } - } + // sort the database - if (module_ctx->module_jit_cache_disable != MODULE_DEFAULT) - { - cache_disable = module_ctx->module_jit_cache_disable (hashconfig, user_options, user_options_extra, hashes, device_param); + tuning_db_t *tuning_db = hashcat_ctx->tuning_db; + + qsort (tuning_db->alias_buf, tuning_db->alias_cnt, sizeof (tuning_db_alias_t), sort_by_tuning_db_alias); + qsort (tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry); } - #if defined (DEBUG) - // https://github.com/hashcat/hashcat/issues/2750 - cache_disable = true; - #endif + // vector_width - /** - * shared kernel with no hashconfig dependencies - */ + int vector_width = 0; + if (user_options->backend_vector_width_chgd == false) { - /** - * kernel shared source filename - */ - - char source_file[256] = { 0 }; + // tuning db - generate_source_kernel_shared_filename (folder_config->shared_dir, source_file); + tuning_db_entry_t *tuningdb_entry; - if (hc_path_read (source_file) == false) + if (user_options->slow_candidates == true) { - event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); - - return -1; + tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode); } - - /** - * kernel shared cached filename - */ - - char cached_file[256] = { 0 }; - - generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file); - - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared); - - if (rc_load_kernel == false) + else { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); - - return -1; + tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode); } - if (device_param->is_cuda == true) + if (tuningdb_entry == NULL || tuningdb_entry->vector_width == -1) { - // GPU memset - - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_memset, device_param->cuda_module_shared, "gpu_memset") == -1) + if (hashconfig->opti_type & OPTI_TYPE_USES_BITS_64) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); - - backend_kernel_create_warnings++; - - device_param->skipped_warning = true; - continue; - } - - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset) == -1) return -1; - - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; + if (device_param->is_cuda == true) + { + // cuda does not support this query - device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset; + vector_width = 1; + } - device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size; + if (device_param->is_hip == true) + { + // hip does not support this query - // GPU bzero + vector_width = 1; + } - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_bzero, device_param->cuda_module_shared, "gpu_bzero") == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + // Metal does not support this query - backend_kernel_create_warnings++; + vector_width = 1; + } + #endif - device_param->skipped_warning = true; - continue; + if (device_param->is_opencl == true) + { + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1) + { + device_param->skipped = true; + continue; + } + } } + else + { + if (device_param->is_cuda == true) + { + // cuda does not support this query - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; - - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero; + vector_width = 1; + } - device_param->kernel_preferred_wgs_multiple_bzero = device_param->cuda_warp_size; + if (device_param->is_hip == true) + { + // hip does not support this query - // GPU autotune init + vector_width = 1; + } - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_atinit, device_param->cuda_module_shared, "gpu_atinit") == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + // Metal does not support this query - backend_kernel_create_warnings++; + vector_width = 1; + } + #endif - device_param->skipped_warning = true; - continue; + if (device_param->is_opencl == true) + { + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL) == -1) + { + device_param->skipped = true; + continue; + } + } } + } + else + { + vector_width = (cl_uint) tuningdb_entry->vector_width; + } + } + else + { + vector_width = user_options->backend_vector_width; + } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; - - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; + // We can't have SIMD in kernels where we have an unknown final password length + // It also turns out that pure kernels (that have a higher register pressure) + // actually run faster on scalar GPU (like 1080) without SIMD - device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit; + if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0) + { + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + vector_width = 1; + } + } - device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size; + if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) + { + // not working in this mode because the GID does not align with password candidate count + // and if it cracks, it will crack the same hash twice, running into segfaults - // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1; - // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1; + vector_width = 1; + } - // GPU decompress + if (vector_width > 16) vector_width = 16; - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_decompress, device_param->cuda_module_shared, "gpu_decompress") == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); + device_param->vector_width = vector_width; - backend_kernel_create_warnings++; + /** + * kernel accel and loops tuning db adjustment + */ - device_param->skipped_warning = true; - continue; - } + device_param->kernel_accel_min = hashconfig->kernel_accel_min; + device_param->kernel_accel_max = hashconfig->kernel_accel_max; + device_param->kernel_loops_min = hashconfig->kernel_loops_min; + device_param->kernel_loops_max = hashconfig->kernel_loops_max; + device_param->kernel_threads_min = hashconfig->kernel_threads_min; + device_param->kernel_threads_max = hashconfig->kernel_threads_max; - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + tuning_db_entry_t *tuningdb_entry = NULL; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; + if (user_options->slow_candidates == true) + { + tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode); + } + else + { + tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode); + } - device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress; + // user commandline option override tuning db + // but both have to stay inside the boundaries of the module - device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size; + if (user_options->kernel_accel_chgd == true) + { + const u32 _kernel_accel = user_options->kernel_accel; - // GPU utf8 to utf16le conversion + if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max)) + { + device_param->kernel_accel_min = _kernel_accel; + device_param->kernel_accel_max = _kernel_accel; + } + } + else + { + if (tuningdb_entry != NULL) + { + const u32 _kernel_accel = tuningdb_entry->kernel_accel; - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_utf8toutf16le, device_param->cuda_module_shared, "gpu_utf8_to_utf16") == -1) + if (_kernel_accel == (u32) -1) // native, makes sense if OPTS_TYPE_MP_MULTI_DISABLE is used { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); - - backend_kernel_create_warnings++; + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT) + { + event_log_warning (hashcat_ctx, "ATTENTION! This hash-mode requires manual tuning to achieve full performance."); + event_log_warning (hashcat_ctx, "The loss of performance can be greater than 100%% without manual tuning."); + event_log_warning (hashcat_ctx, NULL); + event_log_warning (hashcat_ctx, "This warning message disappears after a definition for the installed"); + event_log_warning (hashcat_ctx, "compute-device in this computer has been added to either list:"); + event_log_warning (hashcat_ctx, "- src/modules/module_%05d.c", hashconfig->hash_mode); + event_log_warning (hashcat_ctx, "- hashcat.hctune"); + event_log_warning (hashcat_ctx, NULL); + event_log_warning (hashcat_ctx, "For instructions on tuning, see src/modules/module_%05d.c", hashconfig->hash_mode); + event_log_warning (hashcat_ctx, "Also, consider sending a PR to Hashcat Master so that other users can benefit from your work."); + event_log_warning (hashcat_ctx, NULL); + } + } - device_param->skipped_warning = true; - continue; + device_param->kernel_accel_min = device_param->device_processors; + device_param->kernel_accel_max = device_param->device_processors; } + else + { + if (_kernel_accel) + { + if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max)) + { + device_param->kernel_accel_min = _kernel_accel; + device_param->kernel_accel_max = _kernel_accel; + } + } + } + } + } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; - - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le; + if (user_options->kernel_loops_chgd == true) + { + const u32 _kernel_loops = user_options->kernel_loops; - device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size; + if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max)) + { + device_param->kernel_loops_min = _kernel_loops; + device_param->kernel_loops_max = _kernel_loops; } - - if (device_param->is_hip == true) + } + else + { + if (tuningdb_entry != NULL) { - // GPU memset + u32 _kernel_loops = tuningdb_entry->kernel_loops; - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1) + if (_kernel_loops) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); - - backend_kernel_create_warnings++; + if (user_options->workload_profile == 1) + { + _kernel_loops = (_kernel_loops > 8) ? _kernel_loops / 8 : 1; + } + else if (user_options->workload_profile == 2) + { + _kernel_loops = (_kernel_loops > 4) ? _kernel_loops / 4 : 1; + } - device_param->skipped_warning = true; - continue; + if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max)) + { + device_param->kernel_loops_min = _kernel_loops; + device_param->kernel_loops_max = _kernel_loops; + } } + } + } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; + // there's no thread column in tuning db, stick to commandline if defined - device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset; + if (user_options->kernel_threads_chgd == true) + { + const u32 _kernel_threads = user_options->kernel_threads; - device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size; + if ((_kernel_threads >= device_param->kernel_threads_min) && (_kernel_threads <= device_param->kernel_threads_max)) + { + device_param->kernel_threads_min = _kernel_threads; + device_param->kernel_threads_max = _kernel_threads; + } + } - // GPU bzero + if (user_options->slow_candidates == true) + { + } + else + { + // we have some absolute limits for fast hashes (because of limit constant memory), make sure not to overstep - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_bzero, device_param->hip_module_shared, "gpu_bzero") == -1) + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); - - backend_kernel_create_warnings++; - - device_param->skipped_warning = true; - continue; + device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_RULES); + device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_RULES); } - - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero; - - device_param->kernel_preferred_wgs_multiple_bzero = device_param->hip_warp_size; - - // GPU autotune init - - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1) + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); - - backend_kernel_create_warnings++; - - device_param->skipped_warning = true; - continue; + device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_COMBS); + device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_COMBS); } - - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit; - - device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size; - - // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1; - // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1; - - // GPU decompress - - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1) + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); - - backend_kernel_create_warnings++; - - device_param->skipped_warning = true; - continue; + device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_BFS); + device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_BFS); } + } + } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress; - - device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size; + device_param->kernel_loops_min_sav = device_param->kernel_loops_min; + device_param->kernel_loops_max_sav = device_param->kernel_loops_max; - // GPU utf8 to utf16le conversion + /** + * device properties + */ - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); + const u32 device_processors = device_param->device_processors; - backend_kernel_create_warnings++; + /** + * device threads + */ - device_param->skipped_warning = true; - continue; - } - - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le; + if (hashconfig->opts_type & OPTS_TYPE_MAXIMUM_THREADS) + { + // default for all, because the else branch is doing the same (nothing), but is actually used as a way to + // disable the default native thread configuration for HIP + // this can have negative performance if not tested on multiple different gpu architectures + } + else if (hashconfig->opts_type & OPTS_TYPE_NATIVE_THREADS) + { + u32 native_threads = 0; - device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size; + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + native_threads = device_param->kernel_preferred_wgs_multiple; + } + else + { + // abort? } - if (device_param->is_opencl == true) + if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max)) { - // GPU memset + device_param->kernel_threads_min = native_threads; + device_param->kernel_threads_max = native_threads; + } + else + { + // abort? + } + } + else + { + if (device_param->is_hip == true) + { + const u32 native_threads = device_param->kernel_preferred_wgs_multiple; - if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_memset", &device_param->opencl_kernel_memset) == -1) + if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max)) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); - - backend_kernel_create_warnings++; - - device_param->skipped_warning = true; - continue; + device_param->kernel_threads_min = native_threads; + device_param->kernel_threads_max = native_threads; } - - if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset) == -1) return -1; - - if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; - - if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1; - - if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset) == -1) return -1; - - // GPU bzero - - if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_bzero", &device_param->opencl_kernel_bzero) == -1) + else { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); - - backend_kernel_create_warnings++; - - device_param->skipped_warning = true; - continue; + // abort? } + } + } - if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; - - if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1; - - if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_dynamic_local_mem_size_bzero) == -1) return -1; - - if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1; - - // apple hack, but perhaps also an alternative for other vendors - - if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero; - - // GPU autotune init - - if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); + // this seems to work always - backend_kernel_create_warnings++; + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + u32 native_threads = 1; - device_param->skipped_warning = true; - continue; - } + if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max)) + { + device_param->kernel_threads_min = native_threads; + device_param->kernel_threads_max = native_threads; + } + } - if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + // set some limits with Metal - if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; + device_param->kernel_threads_max = 128; + device_param->kernel_loops_max = 1024; // autotune go over ... + } + #endif - if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1; + /** + * create context for each device + */ - if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit) == -1) return -1; + if (device_param->is_cuda == true) + { + if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) + { + device_param->skipped = true; + continue; + } - // GPU decompress + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) + { + device_param->skipped = true; + continue; + } + } - if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_decompress", &device_param->opencl_kernel_decompress) == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); + if (device_param->is_hip == true) + { + if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1) + { + device_param->skipped = true; + continue; + } - backend_kernel_create_warnings++; + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) + { + device_param->skipped = true; + continue; + } + } - device_param->skipped_warning = true; - continue; - } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + /** + * create command-queue + */ - if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1) + { + device_param->skipped = true; + continue; + } + } + #endif - if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; + if (device_param->is_opencl == true) + { + /* + cl_context_properties properties[3]; - if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1; + properties[0] = CL_CONTEXT_PLATFORM; + properties[1] = (cl_context_properties) device_param->opencl_platform; + properties[2] = 0; - if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress) == -1) return -1; + CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context); + */ - // GPU utf8 to utf16le conversion + if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1) + { + device_param->skipped = true; + continue; + } - if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_utf8_to_utf16", &device_param->opencl_kernel_utf8toutf16le) == -1) - { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); + /** + * create command-queue + */ - backend_kernel_create_warnings++; + // not supported with NV + // device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL); - device_param->skipped_warning = true; - continue; - } + if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1) + { + device_param->skipped = true; + continue; + } + } - if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; + /** + * create stream for CUDA devices + */ - if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; + if (device_param->is_cuda == true) + { + if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1) + { + device_param->skipped = true; + continue; + } + } - if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1; + /** + * create stream for HIP devices + */ - if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_preferred_wgs_multiple_utf8toutf16le) == -1) return -1; + if (device_param->is_hip == true) + { + if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1) + { + device_param->skipped = true; + continue; } } /** - * main kernel + * create events for CUDA devices */ + if (device_param->is_cuda == true) { - char *build_options_module_buf = (char *) hcmalloc (build_options_sz); - - int build_options_module_len = 0; + if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event1, CU_EVENT_BLOCKING_SYNC) == -1) + { + device_param->skipped = true; + continue; + } - build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s ", build_options_buf); + if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_BLOCKING_SYNC) == -1) + { + device_param->skipped = true; + continue; + } - if (module_ctx->module_jit_build_options != MODULE_DEFAULT) + if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event3, CU_EVENT_DISABLE_TIMING) == -1) { - char *jit_build_options = module_ctx->module_jit_build_options (hashconfig, user_options, user_options_extra, hashes, device_param); + device_param->skipped = true; + continue; + } + } - if (jit_build_options != NULL) - { - build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s", jit_build_options); + /** + * create events for HIP devices + */ - // this is a bit ugly - // would be better to have the module return the value as value - - u32 fixed_local_size = 0; - - if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE=%u", &fixed_local_size) == 1) - { - device_param->kernel_threads_min = fixed_local_size; - device_param->kernel_threads_max = fixed_local_size; - } - else - { - // kernels specific minimum needs to be set so that self-test wont fail - - if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", &fixed_local_size) == 1) - { - device_param->kernel_threads_min = fixed_local_size; - // device_param->kernel_threads_max = fixed_local_size; - } - } - } + if (device_param->is_hip == true) + { + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1) + { + device_param->skipped = true; + continue; } - build_options_module_buf[build_options_module_len] = 0; - - #if defined (DEBUG) - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", device_id + 1, build_options_module_buf); - #endif - - /** - * device_name_chksum - */ + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1) + { + device_param->skipped = true; + continue; + } - char device_name_chksum[HCBUFSIZ_TINY] = { 0 }; + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1) + { + device_param->skipped = true; + continue; + } + } - // The kernel source can depend on some JiT compiler macros which themself depend on the attack_modes. - // ATM this is relevant only for ATTACK_MODE_ASSOCIATION which slightly modifies ATTACK_MODE_STRAIGHT kernels. + /** + * create input buffers on device : calculate size of fixed memory buffers + */ - const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE; + u64 size_root_css = SP_PW_MAX * sizeof (cs_t); + u64 size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t); - const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s", - backend_ctx->comptime, - backend_ctx->cuda_driver_version, - backend_ctx->hip_runtimeVersion, - device_param->is_opencl, - device_param->opencl_platform_vendor_id, - device_param->device_name, - device_param->opencl_device_version, - device_param->opencl_driver_version, - device_param->vector_width, - hashconfig->kern_type, - extra_value, - (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max, - build_options_module_buf); + device_param->size_root_css = size_root_css; + device_param->size_markov_css = size_markov_css; - md5_ctx_t md5_ctx; + u64 size_results = sizeof (u32); - md5_init (&md5_ctx); - md5_update (&md5_ctx, (u32 *) device_name_chksum, dnclen); - md5_final (&md5_ctx); + device_param->size_results = size_results; - snprintf (device_name_chksum, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]); + u64 size_rules = (u64) straight_ctx->kernel_rules_cnt * sizeof (kernel_rule_t); + u64 size_rules_c = (u64) KERNEL_RULES * sizeof (kernel_rule_t); - /** - * kernel source filename - */ + device_param->size_rules = size_rules; + device_param->size_rules_c = size_rules_c; - char source_file[256] = { 0 }; + u64 size_plains = (u64) hashes->digests_cnt * sizeof (plain_t); + u64 size_salts = (u64) hashes->salts_cnt * sizeof (salt_t); + u64 size_esalts = (u64) hashes->digests_cnt * hashconfig->esalt_size; + u64 size_shown = (u64) hashes->digests_cnt * sizeof (u32); + u64 size_digests = (u64) hashes->digests_cnt * (u64) hashconfig->dgst_size; - generate_source_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->shared_dir, source_file); + device_param->size_plains = size_plains; + device_param->size_digests = size_digests; + device_param->size_shown = size_shown; + device_param->size_salts = size_salts; + device_param->size_esalts = size_esalts; - if (hc_path_read (source_file) == false) - { - event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); + u64 size_combs = KERNEL_COMBS * sizeof (pw_t); + u64 size_bfs = KERNEL_BFS * sizeof (bf_t); + u64 size_tm = 32 * sizeof (bs_word_t); + u64 size_kernel_params = 1 * sizeof (kernel_param_t); - return -1; - } + device_param->size_bfs = size_bfs; + device_param->size_combs = size_combs; + device_param->size_tm = size_tm; + device_param->size_kernel_params = size_kernel_params; - /** - * kernel cached filename - */ + u64 size_st_digests = 1 * hashconfig->dgst_size; + u64 size_st_salts = 1 * sizeof (salt_t); + u64 size_st_esalts = 1 * hashconfig->esalt_size; - char cached_file[256] = { 0 }; + device_param->size_st_digests = size_st_digests; + device_param->size_st_salts = size_st_salts; + device_param->size_st_esalts = size_st_esalts; - generate_cached_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->cache_dir, device_name_chksum, cached_file); + // extra buffer - /** - * load kernel - */ + u64 size_extra_buffer = 4; - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module); + if (module_ctx->module_extra_buffer_size != MODULE_DEFAULT) + { + const u64 extra_buffer_size = module_ctx->module_extra_buffer_size (hashconfig, user_options, user_options_extra, hashes, device_param); - if (rc_load_kernel == false) + if (extra_buffer_size == (u64) -1) { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "Invalid extra buffer size."); - backend_kernel_build_warnings++; + backend_extra_size_warning++; device_param->skipped_warning = true; continue; } - hcfree (build_options_module_buf); - } + device_param->extra_buffer_size = extra_buffer_size; - /** - * word generator kernel - */ + // for the size we actually allocate we need to cheat a bit in order to make it more easy for plugin developer. + // + // we will divide this size by 4 to workaround opencl limitation. + // this collides with a theoretical scenario (like -n1 -T1) where there's only one workitem, + // because inside the kernel the target buffer is selected by workitem_id / 4. + // but the maximum size of the buffer would be only 1/4 of what is needed -> overflow. + // + // to workaround this we make sure that there's always a full buffer in each of the 4 allocated buffers available. - if (user_options->slow_candidates == true) - { + const u64 kernel_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + + const u64 extra_buffer_size_one = extra_buffer_size / kernel_power_max; + + size_extra_buffer = extra_buffer_size + (extra_buffer_size_one * 4); } - else + + // kern type + + u32 kern_type = hashconfig->kern_type; + + if (module_ctx->module_kern_type_dynamic != MODULE_DEFAULT) { - if ((user_options->attack_mode != ATTACK_MODE_STRAIGHT) && (user_options->attack_mode != ATTACK_MODE_ASSOCIATION)) + if (user_options->benchmark == true) { - /** - * kernel mp source filename - */ + } + else + { + void *digests_buf = hashes->digests_buf; + salt_t *salts_buf = hashes->salts_buf; + void *esalts_buf = hashes->esalts_buf; + void *hook_salts_buf = hashes->hook_salts_buf; + hashinfo_t **hash_info = hashes->hash_info; - char source_file[256] = { 0 }; + hashinfo_t *hash_info_ptr = NULL; - generate_source_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->shared_dir, source_file); + if (hash_info) hash_info_ptr = hash_info[0]; - if (hc_path_read (source_file) == false) - { - event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); + kern_type = (u32) module_ctx->module_kern_type_dynamic (hashconfig, digests_buf, salts_buf, esalts_buf, hook_salts_buf, hash_info_ptr); + } + } - return -1; - } + // built options - /** - * kernel mp cached filename - */ + const size_t build_options_sz = 4096; - char cached_file[256] = { 0 }; + char *build_options_buf = (char *) hcmalloc (build_options_sz); - generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file); + int build_options_len = 0; - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp); + if ((device_param->is_cuda == true) || (device_param->is_hip == true)) + { + // using a path with a space will break nvrtc_make_options_array_from_string() + // we add it to options array in a clean way later - if (rc_load_kernel == false) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC "); + } + else + { + // when is builded with cygwin and msys, cpath_real doesn't work + #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=%s ", "OpenCL"); + #else + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", folder_config->cpath_real); + #endif - return -1; - } + #if defined (__APPLE__) + if (is_apple_silicon() == true) + { + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D IS_APPLE_SILICON "); } + #endif } - /** - * amplifier kernel - */ + /* currently disabled, hangs NEO drivers since 20.09. + was required for NEO driver 20.08 to workaround the same issue! + we go with the latest version - if (user_options->slow_candidates == true) - { - } - else + if (device_param->is_opencl == true) { - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + if (device_param->use_opencl12 == true) { - + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.2 "); } - else + else if (device_param->use_opencl20 == true) { - /** - * kernel amp source filename - */ + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.0 "); + } + else if (device_param->use_opencl21 == true) + { + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.1 "); + } + } + */ - char source_file[256] = { 0 }; + // we don't have sm_* on vendors not NV but it doesn't matter - generate_source_kernel_amp_filename (user_options_extra->attack_kern, folder_config->shared_dir, source_file); + #if defined (DEBUG) + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode); + #else + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode); + #endif - if (hc_path_read (source_file) == false) - { - event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); + build_options_buf[build_options_len] = 0; - return -1; - } + /* + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK) + { + strncat (build_options_buf, " -cl-opt-disable", 16); + } + } + */ - /** - * kernel amp cached filename - */ + #if defined (DEBUG) + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", device_id + 1, build_options_buf); + #endif - char cached_file[256] = { 0 }; + /** + * device_name_chksum_amp_mp + */ - generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file); + char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 }; - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp); + const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%u", + backend_ctx->comptime, + backend_ctx->cuda_driver_version, + backend_ctx->hip_runtimeVersion, + backend_ctx->metal_runtimeVersion, + device_param->is_opencl, + device_param->opencl_platform_vendor_id, + device_param->device_name, + device_param->opencl_device_version, + device_param->opencl_driver_version, + (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max); - if (rc_load_kernel == false) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + md5_ctx_t md5_ctx; - return -1; - } + md5_init (&md5_ctx); + md5_update (&md5_ctx, (u32 *) device_name_chksum_amp_mp, dnclen_amp_mp); + md5_final (&md5_ctx); - hcfree (build_options_buf); - } - } + snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]); /** - * no more need for the compiler. cuda doesn't offer this function. - * from opencl specs: - * Calls to clBuildProgram, clCompileProgram or clLinkProgram after clUnloadPlatformCompiler will reload the compiler, if necessary, to build the appropriate program executable. + * kernel cache */ - // Disabled after user reporting weird errors like CL_OUT_OF_HOST_MEMORY after calling - /* - if (device_param->is_opencl == true) - { - cl_platform_id platform_id = backend_ctx->opencl_platforms[device_param->opencl_platform_id]; - if (hc_clUnloadPlatformCompiler (hashcat_ctx, platform_id) == -1) return -1; - } - */ + bool cache_disable = false; - // some algorithm collide too fast, make that impossible + // Seems to be completely broken on Apple + (Intel?) CPU + // To reproduce set cache_disable to false and run benchmark -b - if (user_options->benchmark == true) + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) { - ((u32 *) hashes->digests_buf)[0] = -1U; - ((u32 *) hashes->digests_buf)[1] = -1U; - ((u32 *) hashes->digests_buf)[2] = -1U; - ((u32 *) hashes->digests_buf)[3] = -1U; + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + cache_disable = true; + } + } + + if (module_ctx->module_jit_cache_disable != MODULE_DEFAULT) + { + cache_disable = module_ctx->module_jit_cache_disable (hashconfig, user_options, user_options_extra, hashes, device_param); } + #if defined (DEBUG) + // https://github.com/hashcat/hashcat/issues/2750 + cache_disable = true; + #endif + /** - * global buffers + * shared kernel with no hashconfig dependencies */ - const u64 size_total_fixed - = bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + bitmap_ctx->bitmap_size - + size_plains - + size_digests - + size_shown - + size_salts - + size_results - + size_extra_buffer - + size_st_digests - + size_st_salts - + size_st_esalts - + size_esalts - + size_markov_css - + size_root_css - + size_rules - + size_rules_c - + size_tm - + size_kernel_params; - - if (size_total_fixed > device_param->device_available_mem) { - event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this hashlist/ruleset.", device_id + 1); + /** + * kernel shared source filename + */ - backend_memory_hit_warnings++; + char source_file[256] = { 0 }; - device_param->skipped_warning = true; - continue; - } + generate_source_kernel_shared_filename (folder_config->shared_dir, source_file); - if (device_param->is_cuda == true) - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_plain_bufs, size_plains) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_buf, size_digests) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_shown, size_shown) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_salt_bufs, size_salts) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_result, size_results) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra0_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra1_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra2_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra3_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_digests_buf, size_st_digests) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_salts_buf, size_st_salts) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_kernel_param, size_kernel_params) == -1) return -1; + if (hc_path_read (source_file) == false) + { + event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf, size_digests, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_salt_bufs, hashes->salts_buf, size_salts, device_param->cuda_stream) == -1) return -1; + return -1; + } /** - * special buffers + * kernel shared cached filename */ - if (user_options->slow_candidates == true) + char cached_file[256] = { 0 }; + + generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal); + + #if defined (__APPLE__) + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared, &device_param->metal_library_shared); + #else + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared, NULL); + #endif + + if (rc_load_kernel == false) { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + return -1; } - else + + if (device_param->is_cuda == true) { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules, size_rules) == -1) return -1; + // GPU memset - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - size_t dummy = 0; + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_memset, device_param->cuda_module_shared, "gpu_memset") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); - if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_rules_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1; - } - else - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; - } + backend_kernel_create_warnings++; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->cuda_stream) == -1) return -1; - } - else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs, size_combs) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs_c, size_combs) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + device_param->skipped_warning = true; + continue; } - else if (user_options_extra->attack_kern == ATTACK_KERN_BF) - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs, size_bfs) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; - - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - size_t dummy = 0; - if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_bfs_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1; + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c, size_tm) == -1) return -1; - } - else - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c, size_bfs) == -1) return -1; - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c, size_tm) == -1) return -1; - } - } - } + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; - if (size_esalts) - { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_esalt_bufs, size_esalts) == -1) return -1; + device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->cuda_stream) == -1) return -1; - } + device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size; - if (hashconfig->st_hash != NULL) - { - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->cuda_stream) == -1) return -1; - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_salts_buf, hashes->st_salts_buf, size_st_salts, device_param->cuda_stream) == -1) return -1; + // GPU bzero - if (size_esalts) + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_bzero, device_param->cuda_module_shared, "gpu_bzero") == -1) { - if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_esalts_buf, size_st_esalts) == -1) return -1; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->cuda_stream) == -1) return -1; + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - } - } - if (device_param->is_hip == true) - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs, size_plains) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf, size_digests) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown, size_shown) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs, size_salts) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result, size_results) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf, size_extra_buffer / 4) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf, size_st_salts) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_kernel_param, size_kernel_params) == -1) return -1; - - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf, size_digests, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_salt_bufs, hashes->salts_buf, size_salts, device_param->hip_stream) == -1) return -1; + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; - /** - * special buffers - */ + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1; - if (user_options->slow_candidates == true) - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; - } - else - { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules, size_rules) == -1) return -1; + device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero; - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - size_t dummy = 0; + device_param->kernel_preferred_wgs_multiple_bzero = device_param->cuda_warp_size; - if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; - } - else - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; - } + // GPU autotune init - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->hip_stream) == -1) return -1; - } - else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_atinit, device_param->cuda_module_shared, "gpu_atinit") == -1) { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs, size_combs) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c, size_combs) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - else if (user_options_extra->attack_kern == ATTACK_KERN_BF) - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs, size_bfs) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - size_t dummy = 0; + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; - if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; - } - else - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c, size_bfs) == -1) return -1; - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; - } + device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit; + + device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size; + + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1; + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1; + + // GPU decompress + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_decompress, device_param->cuda_module_shared, "gpu_decompress") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - } - if (size_esalts) - { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1; + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->hip_stream) == -1) return -1; - } + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; - if (hashconfig->st_hash != NULL) - { - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->hip_stream) == -1) return -1; - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_salts_buf, hashes->st_salts_buf, size_st_salts, device_param->hip_stream) == -1) return -1; + device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress; - if (size_esalts) + device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size; + + // GPU utf8 to utf16le conversion + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_utf8toutf16le, device_param->cuda_module_shared, "gpu_utf8_to_utf16") == -1) { - if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->hip_stream) == -1) return -1; + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - } - } - if (device_param->is_opencl == true) - { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_plains, NULL, &device_param->opencl_d_plain_bufs) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_digests, NULL, &device_param->opencl_d_digests_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_shown, NULL, &device_param->opencl_d_digests_shown) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_salts, NULL, &device_param->opencl_d_salt_bufs) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_results, NULL, &device_param->opencl_d_result) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra0_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra1_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra2_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra3_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_digests, NULL, &device_param->opencl_d_st_digests_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_salts, NULL, &device_param->opencl_d_st_salts_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_kernel_params, NULL, &device_param->opencl_d_kernel_param) == -1) return -1; + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_FALSE, 0, size_digests, hashes->digests_buf, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs, CL_FALSE, 0, size_salts, hashes->salts_buf, 0, NULL, NULL) == -1) return -1; + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; - /** - * special buffers - */ + device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le; - if (user_options->slow_candidates == true) - { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1; + device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size; } - else + + if (device_param->is_hip == true) { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) - { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules, NULL, &device_param->opencl_d_rules) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1; + // GPU memset - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_FALSE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL) == -1) return -1; - } - else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1) { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs, NULL, &device_param->opencl_d_combs) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs, NULL, &device_param->opencl_d_combs_c) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css, NULL, &device_param->opencl_d_root_css_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset; + + device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size; + + // GPU bzero + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_bzero, device_param->hip_module_shared, "gpu_bzero") == -1) { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs, NULL, &device_param->opencl_d_bfs) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs, NULL, &device_param->opencl_d_bfs_c) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm, NULL, &device_param->opencl_d_tm_c) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css, NULL, &device_param->opencl_d_root_css_buf) == -1) return -1; - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - } - if (size_esalts) - { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_FALSE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL) == -1) return -1; - } + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1; - if (hashconfig->st_hash != NULL) - { - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf, CL_FALSE, 0, size_st_digests, hashes->st_digests_buf, 0, NULL, NULL) == -1) return -1; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf, CL_FALSE, 0, size_st_salts, hashes->st_salts_buf, 0, NULL, NULL) == -1) return -1; + device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero; - if (size_esalts) + device_param->kernel_preferred_wgs_multiple_bzero = device_param->hip_warp_size; + + // GPU autotune init + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1) { - if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf) == -1) return -1; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_FALSE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL) == -1) return -1; + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - } - if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1; - } + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; - /** - * kernel args - */ + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; - device_param->kernel_param.bitmap_mask = bitmap_ctx->bitmap_mask; - device_param->kernel_param.bitmap_shift1 = bitmap_ctx->bitmap_shift1; - device_param->kernel_param.bitmap_shift2 = bitmap_ctx->bitmap_shift2; - device_param->kernel_param.salt_pos_host = 0; - device_param->kernel_param.loop_pos = 0; - device_param->kernel_param.loop_cnt = 0; - device_param->kernel_param.il_cnt = 0; - device_param->kernel_param.digests_cnt = 0; - device_param->kernel_param.digests_offset_host = 0; + device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit; + + device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size; + + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1; + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1; + + // GPU decompress + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress; + + device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size; + + // GPU utf8 to utf16le conversion + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le; + + device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + // GPU memset + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_memset", &device_param->metal_function_memset, &device_param->metal_pipeline_memset) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_memset, &device_param->kernel_wgs_memset) == -1) return -1; + + device_param->kernel_local_mem_size_memset = 0; + device_param->kernel_dynamic_local_mem_size_memset = 0; + device_param->kernel_preferred_wgs_multiple_memset = device_param->metal_warp_size; + + // GPU bzero + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_bzero", &device_param->metal_function_bzero, &device_param->metal_pipeline_bzero) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; + + device_param->kernel_local_mem_size_bzero = 0; + device_param->kernel_dynamic_local_mem_size_bzero = 0; + device_param->kernel_preferred_wgs_multiple_bzero = device_param->metal_warp_size; + + // GPU autotune init + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_atinit", &device_param->metal_function_atinit, &device_param->metal_pipeline_atinit) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; + + device_param->kernel_local_mem_size_atinit = 0; + device_param->kernel_dynamic_local_mem_size_atinit = 0; + device_param->kernel_preferred_wgs_multiple_atinit = device_param->metal_warp_size; + + // GPU decompress + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_decompress", &device_param->metal_function_decompress, &device_param->metal_pipeline_decompress) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + + device_param->kernel_local_mem_size_decompress = 0; + device_param->kernel_dynamic_local_mem_size_decompress = 0; + device_param->kernel_preferred_wgs_multiple_decompress = device_param->metal_warp_size; + + // GPU utf8 to utf16le conversion + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_utf8_to_utf16", &device_param->metal_function_utf8toutf16le, &device_param->metal_pipeline_utf8toutf16le) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; + + device_param->kernel_local_mem_size_utf8toutf16le = 0; + device_param->kernel_dynamic_local_mem_size_utf8toutf16le = 0; + device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->metal_warp_size; + } + #endif + + if (device_param->is_opencl == true) + { + // GPU memset + + if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_memset", &device_param->opencl_kernel_memset) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset) == -1) return -1; + + if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; + + if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1; + + if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset) == -1) return -1; + + // GPU bzero + + if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_bzero", &device_param->opencl_kernel_bzero) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_wgs_bzero) == -1) return -1; + + if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1; + + if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_dynamic_local_mem_size_bzero) == -1) return -1; + + if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1; + + // apple hack, but perhaps also an alternative for other vendors + + if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero; + + // GPU autotune init + + if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; + + if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; + + if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1; + + if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit) == -1) return -1; + + // GPU decompress + + if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_decompress", &device_param->opencl_kernel_decompress) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + + if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; + + if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1; + + if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress) == -1) return -1; + + // GPU utf8 to utf16le conversion + + if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_utf8_to_utf16", &device_param->opencl_kernel_utf8toutf16le) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; + + if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; + + if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1; + + if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_preferred_wgs_multiple_utf8toutf16le) == -1) return -1; + } + } + + /** + * main kernel + */ + + { + char *build_options_module_buf = (char *) hcmalloc (build_options_sz); + + int build_options_module_len = 0; + + build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s ", build_options_buf); + + if (module_ctx->module_jit_build_options != MODULE_DEFAULT) + { + char *jit_build_options = module_ctx->module_jit_build_options (hashconfig, user_options, user_options_extra, hashes, device_param); + + if (jit_build_options != NULL) + { + build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s", jit_build_options); + + // this is a bit ugly + // would be better to have the module return the value as value + + u32 fixed_local_size = 0; + + if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE=%u", &fixed_local_size) == 1) + { + device_param->kernel_threads_min = fixed_local_size; + device_param->kernel_threads_max = fixed_local_size; + } + else + { + // kernels specific minimum needs to be set so that self-test wont fail + + if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", &fixed_local_size) == 1) + { + device_param->kernel_threads_min = fixed_local_size; + // device_param->kernel_threads_max = fixed_local_size; + } + } + } + } + + build_options_module_buf[build_options_module_len] = 0; + + #if defined (DEBUG) + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", device_id + 1, build_options_module_buf); + #endif + + /** + * device_name_chksum + */ + + char device_name_chksum[HCBUFSIZ_TINY] = { 0 }; + + // The kernel source can depend on some JiT compiler macros which themself depend on the attack_modes. + // ATM this is relevant only for ATTACK_MODE_ASSOCIATION which slightly modifies ATTACK_MODE_STRAIGHT kernels. + + const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE; + + const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s", + backend_ctx->comptime, + backend_ctx->cuda_driver_version, + backend_ctx->hip_runtimeVersion, + backend_ctx->metal_runtimeVersion, + device_param->is_opencl, + device_param->opencl_platform_vendor_id, + device_param->device_name, + device_param->opencl_device_version, + device_param->opencl_driver_version, + device_param->vector_width, + hashconfig->kern_type, + extra_value, + (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max, + build_options_module_buf); + + memset (&md5_ctx, 0, sizeof (md5_ctx_t)); + md5_init (&md5_ctx); + md5_update (&md5_ctx, (u32 *) device_name_chksum, dnclen); + md5_final (&md5_ctx); + + snprintf (device_name_chksum, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]); + + /** + * kernel source filename + */ + + char source_file[256] = { 0 }; + + generate_source_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->shared_dir, source_file); + + if (hc_path_read (source_file) == false) + { + event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); + + return -1; + } + + /** + * kernel cached filename + */ + + char cached_file[256] = { 0 }; + + generate_cached_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->cache_dir, device_name_chksum, cached_file, device_param->is_metal); + + /** + * load kernel + */ + + #if defined (__APPLE__) + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module, &device_param->metal_library); + #else + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module, NULL); + #endif + + if (rc_load_kernel == false) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + backend_kernel_build_warnings++; + + device_param->skipped_warning = true; + continue; + } + + hcfree (build_options_module_buf); + } + + /** + * word generator kernel + */ + + if (user_options->slow_candidates == true) + { + } + else + { + if ((user_options->attack_mode != ATTACK_MODE_STRAIGHT) && (user_options->attack_mode != ATTACK_MODE_ASSOCIATION)) + { + /** + * kernel mp source filename + */ + + char source_file[256] = { 0 }; + + generate_source_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->shared_dir, source_file); + + if (hc_path_read (source_file) == false) + { + event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); + + return -1; + } + + /** + * kernel mp cached filename + */ + + char cached_file[256] = { 0 }; + + generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal); + + #if defined (__APPLE__) + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp, &device_param->metal_library_mp); + #else + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp, NULL); + #endif + + if (rc_load_kernel == false) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + return -1; + } + } + } + + /** + * amplifier kernel + */ + + if (user_options->slow_candidates == true) + { + } + else + { + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + + } + else + { + /** + * kernel amp source filename + */ + + char source_file[256] = { 0 }; + + generate_source_kernel_amp_filename (user_options_extra->attack_kern, folder_config->shared_dir, source_file); + + if (hc_path_read (source_file) == false) + { + event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno)); + + return -1; + } + + /** + * kernel amp cached filename + */ + + char cached_file[256] = { 0 }; + + generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal); + + #if defined (__APPLE__) + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp, &device_param->metal_library_amp); + #else + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp, NULL); + #endif + + if (rc_load_kernel == false) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + return -1; + } + + hcfree (build_options_buf); + } + } + + /** + * no more need for the compiler. cuda doesn't offer this function. + * from opencl specs: + * Calls to clBuildProgram, clCompileProgram or clLinkProgram after clUnloadPlatformCompiler will reload the compiler, if necessary, to build the appropriate program executable. + */ + // Disabled after user reporting weird errors like CL_OUT_OF_HOST_MEMORY after calling + /* + if (device_param->is_opencl == true) + { + cl_platform_id platform_id = backend_ctx->opencl_platforms[device_param->opencl_platform_id]; + + if (hc_clUnloadPlatformCompiler (hashcat_ctx, platform_id) == -1) return -1; + } + */ + + // some algorithm collide too fast, make that impossible + + if (user_options->benchmark == true) + { + ((u32 *) hashes->digests_buf)[0] = -1U; + ((u32 *) hashes->digests_buf)[1] = -1U; + ((u32 *) hashes->digests_buf)[2] = -1U; + ((u32 *) hashes->digests_buf)[3] = -1U; + } + + /** + * global buffers + */ + + const u64 size_total_fixed + = bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + bitmap_ctx->bitmap_size + + size_plains + + size_digests + + size_shown + + size_salts + + size_results + + size_extra_buffer + + size_st_digests + + size_st_salts + + size_st_esalts + + size_esalts + + size_markov_css + + size_root_css + + size_rules + + size_rules_c + + size_tm + + size_kernel_params; + + if (size_total_fixed > device_param->device_available_mem) + { + event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this hashlist/ruleset.", device_id + 1); + + backend_memory_hit_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (device_param->is_cuda == true) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_plain_bufs, size_plains) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_buf, size_digests) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_shown, size_shown) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_salt_bufs, size_salts) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_result, size_results) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra0_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra1_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra2_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra3_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_digests_buf, size_st_digests) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_salts_buf, size_st_salts) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_kernel_param, size_kernel_params) == -1) return -1; + + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf, size_digests, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_salt_bufs, hashes->salts_buf, size_salts, device_param->cuda_stream) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules, size_rules) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_rules_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1; + } + else + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; + } + + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->cuda_stream) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs, size_combs) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs_c, size_combs) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs, size_bfs) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_bfs_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1; + + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c, size_tm) == -1) return -1; + } + else + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c, size_bfs) == -1) return -1; + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c, size_tm) == -1) return -1; + } + } + } + + if (size_esalts) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_esalt_bufs, size_esalts) == -1) return -1; + + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->cuda_stream) == -1) return -1; + } + + if (hashconfig->st_hash != NULL) + { + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->cuda_stream) == -1) return -1; + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_salts_buf, hashes->st_salts_buf, size_st_salts, device_param->cuda_stream) == -1) return -1; + + if (size_esalts) + { + if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_esalts_buf, size_st_esalts) == -1) return -1; + + if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->cuda_stream) == -1) return -1; + } + } + } + + if (device_param->is_hip == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs, size_plains) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf, size_digests) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown, size_shown) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs, size_salts) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result, size_results) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf, size_st_salts) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_kernel_param, size_kernel_params) == -1) return -1; + + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf, size_digests, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_salt_bufs, hashes->salts_buf, size_salts, device_param->hip_stream) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules, size_rules) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + } + else + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->hip_stream) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs, size_combs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c, size_combs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs, size_bfs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + else + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c, size_bfs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + } + } + + if (size_esalts) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1; + + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->hip_stream) == -1) return -1; + } + + if (hashconfig->st_hash != NULL) + { + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->hip_stream) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_salts_buf, hashes->st_salts_buf, size_st_salts, device_param->hip_stream) == -1) return -1; + + if (size_esalts) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1; + + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->hip_stream) == -1) return -1; + } + } + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_a) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_b) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_c) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_d) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_a) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_b) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_c) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_d) == -1) return -1; + + // shared + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_plains, NULL, &device_param->metal_d_plain_bufs) == -1) return -1; + + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_digests, NULL, &device_param->metal_d_digests_buf) == -1) return -1; + + // shared + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_shown, NULL, &device_param->metal_d_digests_shown) == -1) return -1; + + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_salts, NULL, &device_param->metal_d_salt_bufs) == -1) return -1; + + // shared + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_results, NULL, &device_param->metal_d_result) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4, NULL, &device_param->metal_d_extra0_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4, NULL, &device_param->metal_d_extra1_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4, NULL, &device_param->metal_d_extra2_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4, NULL, &device_param->metal_d_extra3_buf) == -1) return -1; + + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_st_digests, NULL, &device_param->metal_d_st_digests_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_st_salts, NULL, &device_param->metal_d_st_salts_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_kernel_params, NULL, &device_param->metal_d_kernel_param) == -1) return -1; + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_a, 0, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_b, 0, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_c, 0, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_d, 0, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_a, 0, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_b, 0, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_c, 0, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_d, 0, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_digests_buf, 0, hashes->digests_buf, size_digests) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_salt_bufs, 0, hashes->salts_buf, size_salts) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_rules_c, NULL, &device_param->metal_d_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_rules, NULL, &device_param->metal_d_rules) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_rules_c, NULL, &device_param->metal_d_rules_c) == -1) return -1; + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_rules, 0, straight_ctx->kernel_rules_buf, size_rules) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_combs, NULL, &device_param->metal_d_combs) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_combs, NULL, &device_param->metal_d_combs_c) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_root_css, NULL, &device_param->metal_d_root_css_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_markov_css, NULL, &device_param->metal_d_markov_css_buf) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_bfs, NULL, &device_param->metal_d_bfs) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_bfs, NULL, &device_param->metal_d_bfs_c) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_tm, NULL, &device_param->metal_d_tm_c) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_root_css, NULL, &device_param->metal_d_root_css_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_markov_css, NULL, &device_param->metal_d_markov_css_buf) == -1) return -1; + } + } + + if (size_esalts) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_esalts, NULL, &device_param->metal_d_esalt_bufs) == -1) return -1; + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_esalt_bufs, 0, hashes->esalts_buf, size_esalts) == -1) return -1; + } + + if (hashconfig->st_hash != NULL) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_st_digests_buf, 0, hashes->st_digests_buf, size_st_digests) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_st_salts_buf, 0, hashes->st_salts_buf, size_st_salts) == -1) return -1; + + if (size_esalts) + { + // gpu only + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_st_esalts, NULL, &device_param->metal_d_st_esalts_buf) == -1) return -1; + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_st_esalts_buf, 0, hashes->st_esalts_buf, size_st_esalts) == -1) return -1; + } + } + } + #endif // __APPLE__ + + if (device_param->is_opencl == true) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_plains, NULL, &device_param->opencl_d_plain_bufs) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_digests, NULL, &device_param->opencl_d_digests_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_shown, NULL, &device_param->opencl_d_digests_shown) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_salts, NULL, &device_param->opencl_d_salt_bufs) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_results, NULL, &device_param->opencl_d_result) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra0_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra1_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra2_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_extra_buffer / 4, NULL, &device_param->opencl_d_extra3_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_digests, NULL, &device_param->opencl_d_st_digests_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_salts, NULL, &device_param->opencl_d_st_salts_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_kernel_params, NULL, &device_param->opencl_d_kernel_param) == -1) return -1; + + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_FALSE, 0, size_digests, hashes->digests_buf, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs, CL_FALSE, 0, size_salts, hashes->salts_buf, 0, NULL, NULL) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules, NULL, &device_param->opencl_d_rules) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1; + + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_FALSE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs, NULL, &device_param->opencl_d_combs) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs, NULL, &device_param->opencl_d_combs_c) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css, NULL, &device_param->opencl_d_root_css_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs, NULL, &device_param->opencl_d_bfs) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs, NULL, &device_param->opencl_d_bfs_c) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm, NULL, &device_param->opencl_d_tm_c) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css, NULL, &device_param->opencl_d_root_css_buf) == -1) return -1; + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1; + } + } + + if (size_esalts) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs) == -1) return -1; + + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_FALSE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL) == -1) return -1; + } + + if (hashconfig->st_hash != NULL) + { + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf, CL_FALSE, 0, size_st_digests, hashes->st_digests_buf, 0, NULL, NULL) == -1) return -1; + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf, CL_FALSE, 0, size_st_salts, hashes->st_salts_buf, 0, NULL, NULL) == -1) return -1; + + if (size_esalts) + { + if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf) == -1) return -1; + + if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_FALSE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL) == -1) return -1; + } + } + + if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1; + } + + /** + * kernel args + */ + + device_param->kernel_param.bitmap_mask = bitmap_ctx->bitmap_mask; + device_param->kernel_param.bitmap_shift1 = bitmap_ctx->bitmap_shift1; + device_param->kernel_param.bitmap_shift2 = bitmap_ctx->bitmap_shift2; + device_param->kernel_param.salt_pos_host = 0; + device_param->kernel_param.loop_pos = 0; + device_param->kernel_param.loop_cnt = 0; + device_param->kernel_param.il_cnt = 0; + device_param->kernel_param.digests_cnt = 0; + device_param->kernel_param.digests_offset_host = 0; device_param->kernel_param.combs_mode = 0; device_param->kernel_param.salt_repeat = 0; device_param->kernel_param.combs_mode = 0; @@ -9147,357 +10676,1283 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_param.pws_pos = 0; device_param->kernel_param.gid_max = 0; - if (device_param->is_cuda == true) - { - device_param->kernel_params[ 0] = NULL; // &device_param->cuda_d_pws_buf; - device_param->kernel_params[ 1] = &device_param->cuda_d_rules_c; - device_param->kernel_params[ 2] = &device_param->cuda_d_combs_c; - device_param->kernel_params[ 3] = &device_param->cuda_d_bfs_c; - device_param->kernel_params[ 4] = NULL; // &device_param->cuda_d_tmps; - device_param->kernel_params[ 5] = NULL; // &device_param->cuda_d_hooks; - device_param->kernel_params[ 6] = &device_param->cuda_d_bitmap_s1_a; - device_param->kernel_params[ 7] = &device_param->cuda_d_bitmap_s1_b; - device_param->kernel_params[ 8] = &device_param->cuda_d_bitmap_s1_c; - device_param->kernel_params[ 9] = &device_param->cuda_d_bitmap_s1_d; - device_param->kernel_params[10] = &device_param->cuda_d_bitmap_s2_a; - device_param->kernel_params[11] = &device_param->cuda_d_bitmap_s2_b; - device_param->kernel_params[12] = &device_param->cuda_d_bitmap_s2_c; - device_param->kernel_params[13] = &device_param->cuda_d_bitmap_s2_d; - device_param->kernel_params[14] = &device_param->cuda_d_plain_bufs; - device_param->kernel_params[15] = &device_param->cuda_d_digests_buf; - device_param->kernel_params[16] = &device_param->cuda_d_digests_shown; - device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs; - device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs; - device_param->kernel_params[19] = &device_param->cuda_d_result; - device_param->kernel_params[20] = &device_param->cuda_d_extra0_buf; - device_param->kernel_params[21] = &device_param->cuda_d_extra1_buf; - device_param->kernel_params[22] = &device_param->cuda_d_extra2_buf; - device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf; - device_param->kernel_params[24] = &device_param->cuda_d_kernel_param; - } + if (device_param->is_cuda == true) + { + device_param->kernel_params[ 0] = NULL; // &device_param->cuda_d_pws_buf; + device_param->kernel_params[ 1] = &device_param->cuda_d_rules_c; + device_param->kernel_params[ 2] = &device_param->cuda_d_combs_c; + device_param->kernel_params[ 3] = &device_param->cuda_d_bfs_c; + device_param->kernel_params[ 4] = NULL; // &device_param->cuda_d_tmps; + device_param->kernel_params[ 5] = NULL; // &device_param->cuda_d_hooks; + device_param->kernel_params[ 6] = &device_param->cuda_d_bitmap_s1_a; + device_param->kernel_params[ 7] = &device_param->cuda_d_bitmap_s1_b; + device_param->kernel_params[ 8] = &device_param->cuda_d_bitmap_s1_c; + device_param->kernel_params[ 9] = &device_param->cuda_d_bitmap_s1_d; + device_param->kernel_params[10] = &device_param->cuda_d_bitmap_s2_a; + device_param->kernel_params[11] = &device_param->cuda_d_bitmap_s2_b; + device_param->kernel_params[12] = &device_param->cuda_d_bitmap_s2_c; + device_param->kernel_params[13] = &device_param->cuda_d_bitmap_s2_d; + device_param->kernel_params[14] = &device_param->cuda_d_plain_bufs; + device_param->kernel_params[15] = &device_param->cuda_d_digests_buf; + device_param->kernel_params[16] = &device_param->cuda_d_digests_shown; + device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs; + device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs; + device_param->kernel_params[19] = &device_param->cuda_d_result; + device_param->kernel_params[20] = &device_param->cuda_d_extra0_buf; + device_param->kernel_params[21] = &device_param->cuda_d_extra1_buf; + device_param->kernel_params[22] = &device_param->cuda_d_extra2_buf; + device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf; + device_param->kernel_params[24] = &device_param->cuda_d_kernel_param; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf; + device_param->kernel_params[ 1] = &device_param->hip_d_rules_c; + device_param->kernel_params[ 2] = &device_param->hip_d_combs_c; + device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c; + device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps; + device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks; + device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a; + device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b; + device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c; + device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d; + device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a; + device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b; + device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c; + device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d; + device_param->kernel_params[14] = &device_param->hip_d_plain_bufs; + device_param->kernel_params[15] = &device_param->hip_d_digests_buf; + device_param->kernel_params[16] = &device_param->hip_d_digests_shown; + device_param->kernel_params[17] = &device_param->hip_d_salt_bufs; + device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs; + device_param->kernel_params[19] = &device_param->hip_d_result; + device_param->kernel_params[20] = &device_param->hip_d_extra0_buf; + device_param->kernel_params[21] = &device_param->hip_d_extra1_buf; + device_param->kernel_params[22] = &device_param->hip_d_extra2_buf; + device_param->kernel_params[23] = &device_param->hip_d_extra3_buf; + device_param->kernel_params[24] = &device_param->hip_d_kernel_param; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params[ 0] = NULL; // device_param->metal_d_pws_buf; + device_param->kernel_params[ 1] = device_param->metal_d_rules_c; + device_param->kernel_params[ 2] = device_param->metal_d_combs_c; + device_param->kernel_params[ 3] = device_param->metal_d_bfs_c; + device_param->kernel_params[ 4] = NULL; // device_param->metal_d_tmps; + device_param->kernel_params[ 5] = NULL; // device_param->metal_d_hooks; + device_param->kernel_params[ 6] = device_param->metal_d_bitmap_s1_a; + device_param->kernel_params[ 7] = device_param->metal_d_bitmap_s1_b; + device_param->kernel_params[ 8] = device_param->metal_d_bitmap_s1_c; + device_param->kernel_params[ 9] = device_param->metal_d_bitmap_s1_d; + device_param->kernel_params[10] = device_param->metal_d_bitmap_s2_a; + device_param->kernel_params[11] = device_param->metal_d_bitmap_s2_b; + device_param->kernel_params[12] = device_param->metal_d_bitmap_s2_c; + device_param->kernel_params[13] = device_param->metal_d_bitmap_s2_d; + device_param->kernel_params[14] = device_param->metal_d_plain_bufs; + device_param->kernel_params[15] = device_param->metal_d_digests_buf; + device_param->kernel_params[16] = device_param->metal_d_digests_shown; + device_param->kernel_params[17] = device_param->metal_d_salt_bufs; + device_param->kernel_params[18] = device_param->metal_d_esalt_bufs; + device_param->kernel_params[19] = device_param->metal_d_result; + device_param->kernel_params[20] = device_param->metal_d_extra0_buf; + device_param->kernel_params[21] = device_param->metal_d_extra1_buf; + device_param->kernel_params[22] = device_param->metal_d_extra2_buf; + device_param->kernel_params[23] = device_param->metal_d_extra3_buf; + device_param->kernel_params[24] = device_param->metal_d_kernel_param; + } + #endif // __APPLE__ + + if (device_param->is_opencl == true) + { + device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf; + device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c; + device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c; + device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c; + device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps; + device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks; + device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a; + device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b; + device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c; + device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d; + device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a; + device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b; + device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c; + device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d; + device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs; + device_param->kernel_params[15] = &device_param->opencl_d_digests_buf; + device_param->kernel_params[16] = &device_param->opencl_d_digests_shown; + device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs; + device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs; + device_param->kernel_params[19] = &device_param->opencl_d_result; + device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf; + device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf; + device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf; + device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf; + device_param->kernel_params[24] = &device_param->opencl_d_kernel_param; + } + + if (user_options->slow_candidates == true) + { + } + else + { + device_param->kernel_params_mp_buf64[3] = 0; + device_param->kernel_params_mp_buf32[4] = 0; + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; + device_param->kernel_params_mp_buf64[8] = 0; + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + if (device_param->is_cuda == true) + { + device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp[0] = device_param->metal_d_combs; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; + } + } + else + { + if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + { + if (device_param->is_cuda == true) + { + device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp[0] = device_param->metal_d_combs; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; + } + } + else + { + device_param->kernel_params_mp[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->opencl_d_pws_buf + // : &device_param->opencl_d_pws_amp_buf; + } + } + + if (device_param->is_cuda == true) + { + device_param->kernel_params_mp[1] = &device_param->cuda_d_root_css_buf; + device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp[1] = device_param->metal_d_root_css_buf; + device_param->kernel_params_mp[2] = device_param->metal_d_markov_css_buf; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf; + device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf; + } + + device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3]; + device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4]; + device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5]; + device_param->kernel_params_mp[6] = &device_param->kernel_params_mp_buf32[6]; + device_param->kernel_params_mp[7] = &device_param->kernel_params_mp_buf32[7]; + device_param->kernel_params_mp[8] = &device_param->kernel_params_mp_buf64[8]; + + device_param->kernel_params_mp_l_buf64[3] = 0; + device_param->kernel_params_mp_l_buf32[4] = 0; + device_param->kernel_params_mp_l_buf32[5] = 0; + device_param->kernel_params_mp_l_buf32[6] = 0; + device_param->kernel_params_mp_l_buf32[7] = 0; + device_param->kernel_params_mp_l_buf32[8] = 0; + device_param->kernel_params_mp_l_buf64[9] = 0; + + device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->opencl_d_pws_buf + // : &device_param->opencl_d_pws_amp_buf; + + if (device_param->is_cuda == true) + { + device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf; + device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp_l[1] = device_param->metal_d_root_css_buf; + device_param->kernel_params_mp_l[2] = device_param->metal_d_markov_css_buf; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf; + device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf; + } + + device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3]; + device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4]; + device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5]; + device_param->kernel_params_mp_l[6] = &device_param->kernel_params_mp_l_buf32[6]; + device_param->kernel_params_mp_l[7] = &device_param->kernel_params_mp_l_buf32[7]; + device_param->kernel_params_mp_l[8] = &device_param->kernel_params_mp_l_buf32[8]; + device_param->kernel_params_mp_l[9] = &device_param->kernel_params_mp_l_buf64[9]; + + device_param->kernel_params_mp_r_buf64[3] = 0; + device_param->kernel_params_mp_r_buf32[4] = 0; + device_param->kernel_params_mp_r_buf32[5] = 0; + device_param->kernel_params_mp_r_buf32[6] = 0; + device_param->kernel_params_mp_r_buf32[7] = 0; + device_param->kernel_params_mp_r_buf64[8] = 0; + + if (device_param->is_cuda == true) + { + device_param->kernel_params_mp_r[0] = &device_param->cuda_d_bfs; + device_param->kernel_params_mp_r[1] = &device_param->cuda_d_root_css_buf; + device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs; + device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp_r[0] = device_param->metal_d_bfs; + device_param->kernel_params_mp_r[1] = device_param->metal_d_root_css_buf; + device_param->kernel_params_mp_r[2] = device_param->metal_d_markov_css_buf; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs; + device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf; + device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf; + } + + device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3]; + device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4]; + device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5]; + device_param->kernel_params_mp_r[6] = &device_param->kernel_params_mp_r_buf32[6]; + device_param->kernel_params_mp_r[7] = &device_param->kernel_params_mp_r_buf32[7]; + device_param->kernel_params_mp_r[8] = &device_param->kernel_params_mp_r_buf64[8]; + + device_param->kernel_params_amp_buf32[5] = 0; // combs_mode + device_param->kernel_params_amp_buf64[6] = 0; // gid_max + + if (device_param->is_cuda == true) + { + device_param->kernel_params_amp[0] = NULL; // &device_param->cuda_d_pws_buf; + device_param->kernel_params_amp[1] = NULL; // &device_param->cuda_d_pws_amp_buf; + device_param->kernel_params_amp[2] = &device_param->cuda_d_rules_c; + device_param->kernel_params_amp[3] = &device_param->cuda_d_combs_c; + device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf; + device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf; + device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c; + device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c; + device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_amp[0] = NULL; // device_param->metal_d_pws_buf; + device_param->kernel_params_amp[1] = NULL; // device_param->metal_d_pws_amp_buf; + device_param->kernel_params_amp[2] = device_param->metal_d_rules_c; + device_param->kernel_params_amp[3] = device_param->metal_d_combs_c; + device_param->kernel_params_amp[4] = device_param->metal_d_bfs_c; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf; + device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf; + device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c; + device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c; + device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c; + } + + device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5]; + device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6]; + + if (device_param->is_cuda == true) + { + device_param->kernel_params_tm[0] = &device_param->cuda_d_bfs_c; + device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c; + device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_tm[0] = device_param->metal_d_bfs_c; + device_param->kernel_params_tm[1] = device_param->metal_d_tm_c; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c; + device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c; + } + } + + device_param->kernel_params_memset_buf32[1] = 0; // value + device_param->kernel_params_memset_buf64[2] = 0; // gid_max + + device_param->kernel_params_memset[0] = NULL; + device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1]; + device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf64[2]; + + device_param->kernel_params_bzero_buf64[1] = 0; // gid_max + + device_param->kernel_params_bzero[0] = NULL; + device_param->kernel_params_bzero[1] = &device_param->kernel_params_bzero_buf64[1]; + + device_param->kernel_params_atinit_buf64[1] = 0; // gid_max + + device_param->kernel_params_atinit[0] = NULL; + device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1]; + + device_param->kernel_params_utf8toutf16le_buf64[1] = 0; // gid_max + + device_param->kernel_params_utf8toutf16le[0] = NULL; + device_param->kernel_params_utf8toutf16le[1] = &device_param->kernel_params_utf8toutf16le_buf64[1]; + + device_param->kernel_params_decompress_buf64[3] = 0; // gid_max + + if (device_param->is_cuda == true) + { + device_param->kernel_params_decompress[0] = NULL; // &device_param->cuda_d_pws_idx; + device_param->kernel_params_decompress[1] = NULL; // &device_param->cuda_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->cuda_d_pws_buf + // : &device_param->cuda_d_pws_amp_buf; + } + + if (device_param->is_hip == true) + { + device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx; + device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->hip_d_pws_buf + // : &device_param->hip_d_pws_amp_buf; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_decompress[0] = NULL; // device_param->metal_d_pws_idx; + device_param->kernel_params_decompress[1] = NULL; // device_param->metal_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? device_param->metal_d_pws_buf + // : device_param->metal_d_pws_amp_buf; + } + #endif + + if (device_param->is_opencl == true) + { + device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx; + device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->opencl_d_pws_buf + // : &device_param->opencl_d_pws_amp_buf; + } + + device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3]; + + /** + * kernel name + */ + + if (device_param->is_cuda == true) + { + char kernel_name[64] = { 0 }; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH) + { + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; + + device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; + + device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size; + } + else + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4; + + device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size; + } + } + else + { + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; + + device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; + + device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size; + } + else + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4; + + device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if (user_options->attack_mode == ATTACK_MODE_BF) + { + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_tm, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_wgs_tm) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_tm = device_param->device_local_mem_size - device_param->kernel_local_mem_size_tm; + + device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size; + } + } + } + } + else + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; + + device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; + + device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size; + + if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE) + { + // kernel2p + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2p, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_wgs2p) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size2p; + + device_param->kernel_preferred_wgs_multiple2p = device_param->cuda_warp_size; + } + + if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED) + { + // kernel2e - if (device_param->is_hip == true) - { - device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf; - device_param->kernel_params[ 1] = &device_param->hip_d_rules_c; - device_param->kernel_params[ 2] = &device_param->hip_d_combs_c; - device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c; - device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps; - device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks; - device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a; - device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b; - device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c; - device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d; - device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a; - device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b; - device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c; - device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d; - device_param->kernel_params[14] = &device_param->hip_d_plain_bufs; - device_param->kernel_params[15] = &device_param->hip_d_digests_buf; - device_param->kernel_params[16] = &device_param->hip_d_digests_shown; - device_param->kernel_params[17] = &device_param->hip_d_salt_bufs; - device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs; - device_param->kernel_params[19] = &device_param->hip_d_result; - device_param->kernel_params[20] = &device_param->hip_d_extra0_buf; - device_param->kernel_params[21] = &device_param->hip_d_extra1_buf; - device_param->kernel_params[22] = &device_param->hip_d_extra2_buf; - device_param->kernel_params[23] = &device_param->hip_d_extra3_buf; - device_param->kernel_params[24] = &device_param->hip_d_kernel_param; - } + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type); - if (device_param->is_opencl == true) - { - device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf; - device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c; - device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c; - device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c; - device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps; - device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks; - device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a; - device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b; - device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c; - device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d; - device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a; - device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b; - device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c; - device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d; - device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs; - device_param->kernel_params[15] = &device_param->opencl_d_digests_buf; - device_param->kernel_params[16] = &device_param->opencl_d_digests_shown; - device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs; - device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs; - device_param->kernel_params[19] = &device_param->opencl_d_result; - device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf; - device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf; - device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf; - device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf; - device_param->kernel_params[24] = &device_param->opencl_d_kernel_param; - } + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2e, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_wgs2e) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size2e = device_param->device_local_mem_size - device_param->kernel_local_mem_size2e; + + device_param->kernel_preferred_wgs_multiple2e = device_param->cuda_warp_size; + } + + // kernel12 + + if (hashconfig->opts_type & OPTS_TYPE_HOOK12) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function12, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_wgs12) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_local_mem_size12) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size12 = device_param->device_local_mem_size - device_param->kernel_local_mem_size12; + + device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size; + } + + // kernel23 + + if (hashconfig->opts_type & OPTS_TYPE_HOOK23) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function23, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_wgs23) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_local_mem_size23) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size23 = device_param->device_local_mem_size - device_param->kernel_local_mem_size23; + + device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size; + } + + // init2 + + if (hashconfig->opts_type & OPTS_TYPE_INIT2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_init2, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_wgs_init2) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_init2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_init2; + + device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size; + } + + // loop2 prepare + + if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2p, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_loop2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2p; + + device_param->kernel_preferred_wgs_multiple_loop2p = device_param->cuda_warp_size; + } + + // loop2 + + if (hashconfig->opts_type & OPTS_TYPE_LOOP2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_loop2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2; + + device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size; + } + + // aux1 + + if (hashconfig->opts_type & OPTS_TYPE_AUX1) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux1, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_aux1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux1; + + device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size; + } - if (user_options->slow_candidates == true) - { - } - else - { - device_param->kernel_params_mp_buf64[3] = 0; - device_param->kernel_params_mp_buf32[4] = 0; - device_param->kernel_params_mp_buf32[5] = 0; - device_param->kernel_params_mp_buf32[6] = 0; - device_param->kernel_params_mp_buf32[7] = 0; - device_param->kernel_params_mp_buf64[8] = 0; + // aux2 - if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) - { - if (device_param->is_cuda == true) + if (hashconfig->opts_type & OPTS_TYPE_AUX2) { - device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux2, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_aux2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux2; + + device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size; } - if (device_param->is_hip == true) + // aux3 + + if (hashconfig->opts_type & OPTS_TYPE_AUX3) { - device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux3, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_aux3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux3; + + device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size; } - if (device_param->is_opencl == true) + // aux4 + + if (hashconfig->opts_type & OPTS_TYPE_AUX4) { - device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type); + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux4, device_param->cuda_module, kernel_name) == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_aux4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux4; + + device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size; } } + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1; + + // MP start + + if (user_options->slow_candidates == true) + { + } else { - if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + if (user_options->attack_mode == ATTACK_MODE_BF) { - if (device_param->is_cuda == true) + // mp_l + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_l, device_param->cuda_module_mp, "l_markov") == -1) { - device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "l_markov"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - if (device_param->is_hip == true) + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_mp_l = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_l; + + device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size; + + // mp_r + + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_r, device_param->cuda_module_mp, "r_markov") == -1) { - device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "r_markov"); + + backend_kernel_create_warnings++; + + device_param->skipped_warning = true; + continue; } - if (device_param->is_opencl == true) + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_mp_r = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_r; + + device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size; + + if (user_options->attack_mode == ATTACK_MODE_BF) { - device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1; + } } } - else + else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) { - device_param->kernel_params_mp[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - // ? &device_param->opencl_d_pws_buf - // : &device_param->opencl_d_pws_amp_buf; - } - } + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov"); - if (device_param->is_cuda == true) - { - device_param->kernel_params_mp[1] = &device_param->cuda_d_root_css_buf; - device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf; - } + backend_kernel_create_warnings++; - if (device_param->is_hip == true) - { - device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf; - device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf; - } + device_param->skipped_warning = true; + continue; + } - if (device_param->is_opencl == true) - { - device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf; - device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf; - } + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; - device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3]; - device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4]; - device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5]; - device_param->kernel_params_mp[6] = &device_param->kernel_params_mp_buf32[6]; - device_param->kernel_params_mp[7] = &device_param->kernel_params_mp_buf32[7]; - device_param->kernel_params_mp[8] = &device_param->kernel_params_mp_buf64[8]; + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; - device_param->kernel_params_mp_l_buf64[3] = 0; - device_param->kernel_params_mp_l_buf32[4] = 0; - device_param->kernel_params_mp_l_buf32[5] = 0; - device_param->kernel_params_mp_l_buf32[6] = 0; - device_param->kernel_params_mp_l_buf32[7] = 0; - device_param->kernel_params_mp_l_buf32[8] = 0; - device_param->kernel_params_mp_l_buf64[9] = 0; + device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp; - device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - // ? &device_param->opencl_d_pws_buf - // : &device_param->opencl_d_pws_amp_buf; + device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size; + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) + { + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov"); - if (device_param->is_cuda == true) - { - device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf; - device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf; - } + backend_kernel_create_warnings++; - if (device_param->is_hip == true) - { - device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf; - device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf; + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + + device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp; + + device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size; + } } - if (device_param->is_opencl == true) + if (user_options->slow_candidates == true) { - device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf; - device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf; } + else + { + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + // nothing to do + } + else + { + if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_amp, device_param->cuda_module_amp, "amp") == -1) + { + event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "amp"); - device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3]; - device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4]; - device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5]; - device_param->kernel_params_mp_l[6] = &device_param->kernel_params_mp_l_buf32[6]; - device_param->kernel_params_mp_l[7] = &device_param->kernel_params_mp_l_buf32[7]; - device_param->kernel_params_mp_l[8] = &device_param->kernel_params_mp_l_buf32[8]; - device_param->kernel_params_mp_l[9] = &device_param->kernel_params_mp_l_buf64[9]; + backend_kernel_create_warnings++; - device_param->kernel_params_mp_r_buf64[3] = 0; - device_param->kernel_params_mp_r_buf32[4] = 0; - device_param->kernel_params_mp_r_buf32[5] = 0; - device_param->kernel_params_mp_r_buf32[6] = 0; - device_param->kernel_params_mp_r_buf32[7] = 0; - device_param->kernel_params_mp_r_buf64[8] = 0; + device_param->skipped_warning = true; + continue; + } + + if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_wgs_amp) == -1) return -1; - if (device_param->is_cuda == true) - { - device_param->kernel_params_mp_r[0] = &device_param->cuda_d_bfs; - device_param->kernel_params_mp_r[1] = &device_param->cuda_d_root_css_buf; - device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf; - } + if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; - if (device_param->is_hip == true) - { - device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs; - device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf; - device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf; - } + device_param->kernel_dynamic_local_mem_size_amp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_amp; - if (device_param->is_opencl == true) - { - device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs; - device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf; - device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf; - } + device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size; + } - device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3]; - device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4]; - device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5]; - device_param->kernel_params_mp_r[6] = &device_param->kernel_params_mp_r_buf32[6]; - device_param->kernel_params_mp_r[7] = &device_param->kernel_params_mp_r_buf32[7]; - device_param->kernel_params_mp_r[8] = &device_param->kernel_params_mp_r_buf64[8]; + /* + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + // nothing to do + } + else + { + for (u32 i = 0; i < 5; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]); - device_param->kernel_params_amp_buf32[5] = 0; // combs_mode - device_param->kernel_params_amp_buf64[6] = 0; // gid_max + //if (CL_rc == -1) return -1; + } - if (device_param->is_cuda == true) - { - device_param->kernel_params_amp[0] = NULL; // &device_param->cuda_d_pws_buf; - device_param->kernel_params_amp[1] = NULL; // &device_param->cuda_d_pws_amp_buf; - device_param->kernel_params_amp[2] = &device_param->cuda_d_rules_c; - device_param->kernel_params_amp[3] = &device_param->cuda_d_combs_c; - device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c; - } + for (u32 i = 5; i < 6; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]); - if (device_param->is_hip == true) - { - device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf; - device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf; - device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c; - device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c; - device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c; - } + //if (CL_rc == -1) return -1; + } - if (device_param->is_opencl == true) - { - device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf; - device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf; - device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c; - device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c; - device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c; + for (u32 i = 6; i < 7; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + } + */ } - device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5]; - device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6]; + // zero some data buffers - if (device_param->is_cuda == true) + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, device_param->size_results) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) { - device_param->kernel_params_tm[0] = &device_param->cuda_d_bfs_c; - device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; } - - if (device_param->is_hip == true) + else { - device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c; - device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c; + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs, size_combs) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c, size_combs) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs, size_bfs) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c, size_bfs) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c, size_tm) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; + if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + } } - if (device_param->is_opencl == true) + if (user_options->slow_candidates == true) { - device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c; - device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c; } - } - - device_param->kernel_params_memset_buf32[1] = 0; // value - device_param->kernel_params_memset_buf64[2] = 0; // gid_max - - device_param->kernel_params_memset[0] = NULL; - device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1]; - device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf64[2]; - - device_param->kernel_params_bzero_buf64[1] = 0; // gid_max - - device_param->kernel_params_bzero[0] = NULL; - device_param->kernel_params_bzero[1] = &device_param->kernel_params_bzero_buf64[1]; + else + { + if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2)) + { + /** + * prepare mp + */ - device_param->kernel_params_atinit_buf64[1] = 0; // gid_max + if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + { + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; - device_param->kernel_params_atinit[0] = NULL; - device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1]; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_buf32[5] = full01; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_buf32[5] = full06; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_buf32[5] = full80; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1; + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) + { + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; + } - device_param->kernel_params_utf8toutf16le_buf64[1] = 0; // gid_max + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + } + else if (user_options->attack_mode == ATTACK_MODE_BF) + { + /** + * prepare mp_r and mp_l + */ - device_param->kernel_params_utf8toutf16le[0] = NULL; - device_param->kernel_params_utf8toutf16le[1] = &device_param->kernel_params_utf8toutf16le_buf64[1]; + device_param->kernel_params_mp_l_buf32[6] = 0; + device_param->kernel_params_mp_l_buf32[7] = 0; + device_param->kernel_params_mp_l_buf32[8] = 0; - device_param->kernel_params_decompress_buf64[3] = 0; // gid_max + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_l_buf32[6] = full01; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_l_buf32[6] = full06; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_l_buf32[6] = full80; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1; - if (device_param->is_cuda == true) - { - device_param->kernel_params_decompress[0] = NULL; // &device_param->cuda_d_pws_idx; - device_param->kernel_params_decompress[1] = NULL; // &device_param->cuda_d_pws_comp_buf; - device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - // ? &device_param->cuda_d_pws_buf - // : &device_param->cuda_d_pws_amp_buf; + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + } + } } if (device_param->is_hip == true) - { - device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx; - device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf; - device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - // ? &device_param->hip_d_pws_buf - // : &device_param->hip_d_pws_amp_buf; - } - - if (device_param->is_opencl == true) - { - device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx; - device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf; - device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - // ? &device_param->opencl_d_pws_buf - // : &device_param->opencl_d_pws_amp_buf; - } - - device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3]; - - /** - * kernel name - */ - - if (device_param->is_cuda == true) { char kernel_name[64] = { 0 }; @@ -9511,7 +11966,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9521,19 +11976,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; - device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; // kernel2 snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9543,19 +11998,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; - device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; // kernel3 snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9565,19 +12020,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; - device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; } else { snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9587,13 +12042,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4; - device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; } } else @@ -9604,7 +12059,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9614,19 +12069,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; - device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; // kernel2 snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9636,19 +12091,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; - device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; // kernel3 snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9658,19 +12113,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; - device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; } else { snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9680,13 +12135,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4; - device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; } } @@ -9701,7 +12156,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_tm, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9711,13 +12166,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_wgs_tm) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; device_param->kernel_dynamic_local_mem_size_tm = device_param->device_local_mem_size - device_param->kernel_local_mem_size_tm; - device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size; } } } @@ -9728,7 +12183,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9738,19 +12193,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; - device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; // kernel2 snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9760,19 +12215,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; - device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; // kernel3 snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9782,13 +12237,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; - device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE) { @@ -9796,7 +12251,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2p, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2p, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9806,13 +12261,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_wgs2p) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_wgs2p) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1; device_param->kernel_dynamic_local_mem_size2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size2p; - device_param->kernel_preferred_wgs_multiple2p = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size; } if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED) @@ -9821,7 +12276,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2e, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9831,13 +12286,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_wgs2e) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; device_param->kernel_dynamic_local_mem_size2e = device_param->device_local_mem_size - device_param->kernel_local_mem_size2e; - device_param->kernel_preferred_wgs_multiple2e = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size; } // kernel12 @@ -9846,7 +12301,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function12, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9856,13 +12311,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_wgs12) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_local_mem_size12) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1; device_param->kernel_dynamic_local_mem_size12 = device_param->device_local_mem_size - device_param->kernel_local_mem_size12; - device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size; } // kernel23 @@ -9871,7 +12326,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function23, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9881,13 +12336,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_wgs23) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_local_mem_size23) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1; device_param->kernel_dynamic_local_mem_size23 = device_param->device_local_mem_size - device_param->kernel_local_mem_size23; - device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size; } // init2 @@ -9896,7 +12351,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_init2, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9906,13 +12361,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_wgs_init2) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; device_param->kernel_dynamic_local_mem_size_init2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_init2; - device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size; } // loop2 prepare @@ -9921,7 +12376,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2p, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2p, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9931,13 +12386,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1; device_param->kernel_dynamic_local_mem_size_loop2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2p; - device_param->kernel_preferred_wgs_multiple_loop2p = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size; } // loop2 @@ -9946,7 +12401,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9956,13 +12411,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; device_param->kernel_dynamic_local_mem_size_loop2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2; - device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size; } // aux1 @@ -9971,7 +12426,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux1, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -9981,13 +12436,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; device_param->kernel_dynamic_local_mem_size_aux1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux1; - device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size; } // aux2 @@ -9996,7 +12451,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux2, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10006,13 +12461,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; device_param->kernel_dynamic_local_mem_size_aux2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux2; - device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size; } // aux3 @@ -10021,7 +12476,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux3, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10031,13 +12486,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; device_param->kernel_dynamic_local_mem_size_aux3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux3; - device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size; } // aux4 @@ -10046,7 +12501,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type); - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux4, device_param->cuda_module, kernel_name) == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10056,13 +12511,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; device_param->kernel_dynamic_local_mem_size_aux4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux4; - device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size; } } @@ -10082,7 +12537,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { // mp_l - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_l, device_param->cuda_module_mp, "l_markov") == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "l_markov"); @@ -10092,17 +12547,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; device_param->kernel_dynamic_local_mem_size_mp_l = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_l; - device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size; // mp_r - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_r, device_param->cuda_module_mp, "r_markov") == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "r_markov"); @@ -10112,13 +12567,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; device_param->kernel_dynamic_local_mem_size_mp_r = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_r; - device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size; if (user_options->attack_mode == ATTACK_MODE_BF) { @@ -10131,7 +12586,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) { - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov"); @@ -10141,17 +12596,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp; - device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; } else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) { - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov"); @@ -10161,13 +12616,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp; - device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; } } @@ -10182,7 +12637,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } else { - if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_amp, device_param->cuda_module_amp, "amp") == -1) + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "amp"); @@ -10192,13 +12647,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_wgs_amp) == -1) return -1; + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1; - if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; device_param->kernel_dynamic_local_mem_size_amp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_amp; - device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size; + device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size; } /* @@ -10234,9 +12689,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // zero some data buffers - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, device_param->size_plains) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, device_param->size_results) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1; /** * special buffers @@ -10244,28 +12699,28 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (user_options->slow_candidates == true) { - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; } else { if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs, size_combs) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c, size_combs) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs, size_combs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, size_combs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs, size_bfs) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c, size_bfs) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c, size_tm) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf, size_root_css) == -1) return -1; - if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs, size_bfs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, size_bfs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; } } @@ -10323,7 +12778,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } - if (device_param->is_hip == true) + #if defined (__APPLE__) + if (device_param->is_metal == true) { char kernel_name[64] = { 0 }; @@ -10333,11 +12789,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) { - // kernel1 + // kernel1: m%05u_s%02d snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function1, &device_param->metal_pipeline1) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10347,19 +12803,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_wgs1) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_preferred_wgs_multiple1) == -1) return -1; - device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; + device_param->kernel_local_mem_size1 = 0; - device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size1 = 0; - // kernel2 + // kernel2: m%05u_s%02d snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2, &device_param->metal_pipeline2) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10369,19 +12825,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_wgs2) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_preferred_wgs_multiple2) == -1) return -1; - device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; + device_param->kernel_local_mem_size2 = 0; - device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size2 = 0; - // kernel3 + // kernel3: m%05u_s%02d snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function3, &device_param->metal_pipeline3) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10391,19 +12847,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_wgs3) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_preferred_wgs_multiple3) == -1) return -1; - device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; + device_param->kernel_local_mem_size3 = 0; - device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size3 = 0; } else { + // kernel4: m%05u_sxx + snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function4, &device_param->metal_pipeline4) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10413,16 +12871,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_wgs4) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_preferred_wgs_multiple4) == -1) return -1; - device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4; + device_param->kernel_local_mem_size4 = 0; - device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size4 = 0; } } - else + else // multi { if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) { @@ -10430,7 +12888,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function1, &device_param->metal_pipeline1) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10440,19 +12898,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_wgs1) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_preferred_wgs_multiple1) == -1) return -1; - device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; + device_param->kernel_local_mem_size1 = 0; - device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size1 = 0; // kernel2 snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2, &device_param->metal_pipeline2) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10462,19 +12920,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_wgs2) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_preferred_wgs_multiple2) == -1) return -1; - device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; + device_param->kernel_local_mem_size2 = 0; - device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size2 = 0; // kernel3 snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function3, &device_param->metal_pipeline3) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10484,19 +12942,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_wgs3) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_preferred_wgs_multiple3) == -1) return -1; - device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; + device_param->kernel_local_mem_size3 = 0; - device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size3 = 0; } else { + // kernel4 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function4, &device_param->metal_pipeline4) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10506,13 +12966,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_wgs4) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_preferred_wgs_multiple4) == -1) return -1; - device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4; + device_param->kernel_local_mem_size4 = 0; - device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size4 = 0; } } @@ -10527,7 +12987,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_tm, &device_param->metal_pipeline_tm) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10537,24 +12997,24 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_tm, &device_param->kernel_wgs_tm) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_tm, &device_param->kernel_preferred_wgs_multiple_tm) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_tm = device_param->device_local_mem_size - device_param->kernel_local_mem_size_tm; + device_param->kernel_local_mem_size_tm = 0; - device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_tm = 0; } } } } else { - // kernel1 + // kernel1: m%05u_init snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function1, &device_param->metal_pipeline1) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10564,19 +13024,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_wgs1) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_preferred_wgs_multiple1) == -1) return -1; - device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1; + device_param->kernel_local_mem_size1 = 0; - device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size1 = 0; - // kernel2 + // kernel2: m%05u_loop snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2, &device_param->metal_pipeline2) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10586,19 +13046,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_wgs2) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_preferred_wgs_multiple2) == -1) return -1; - device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2; + device_param->kernel_local_mem_size2 = 0; - device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size2 = 0; - // kernel3 + // kernel3: m%05u_comp snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function3, &device_param->metal_pipeline3) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10608,21 +13068,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_wgs3) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_preferred_wgs_multiple3) == -1) return -1; - device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3; + device_param->kernel_local_mem_size3 = 0; - device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size3 = 0; if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE) { - // kernel2p + // kernel2p: m%05u_loop_prepare snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2p, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2p, &device_param->metal_pipeline2p) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10632,22 +13092,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_wgs2p) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2p, &device_param->kernel_wgs2p) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2p, &device_param->kernel_preferred_wgs_multiple2p) == -1) return -1; - device_param->kernel_dynamic_local_mem_size2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size2p; + device_param->kernel_local_mem_size2p = 0; - device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size2p = 0; } if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED) { - // kernel2e + // kernel2e: m%05u_loop_extended snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2e, &device_param->metal_pipeline2e) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10657,22 +13117,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2e, &device_param->kernel_wgs2e) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2e, &device_param->kernel_preferred_wgs_multiple2e) == -1) return -1; - device_param->kernel_dynamic_local_mem_size2e = device_param->device_local_mem_size - device_param->kernel_local_mem_size2e; + device_param->kernel_local_mem_size2e = 0; - device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size2e = 0; } - // kernel12 - if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { + // kernel12: m%05u_hook12 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function12, &device_param->metal_pipeline12) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10682,22 +13142,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline12, &device_param->kernel_wgs12) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline12, &device_param->kernel_preferred_wgs_multiple12) == -1) return -1; - device_param->kernel_dynamic_local_mem_size12 = device_param->device_local_mem_size - device_param->kernel_local_mem_size12; + device_param->kernel_local_mem_size12 = 0; - device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size12 = 0; } - // kernel23 - if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { + // kernel23: m%05u_hook23 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function23, &device_param->metal_pipeline23) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10707,22 +13167,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline23, &device_param->kernel_wgs23) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline23, &device_param->kernel_preferred_wgs_multiple23) == -1) return -1; - device_param->kernel_dynamic_local_mem_size23 = device_param->device_local_mem_size - device_param->kernel_local_mem_size23; + device_param->kernel_local_mem_size23 = 0; - device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size23 = 0; } - // init2 - if (hashconfig->opts_type & OPTS_TYPE_INIT2) { + // init2: m%05u_init2 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_init2, &device_param->metal_pipeline_init2) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10732,22 +13192,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_init2, &device_param->kernel_wgs_init2) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_init2, &device_param->kernel_preferred_wgs_multiple_init2) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_init2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_init2; + device_param->kernel_local_mem_size_init2 = 0; - device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_init2 = 0; } - // loop2 prepare - if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE) { + // loop2 prepare: m%05u_loop2_prepare + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2p, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_loop2p, &device_param->metal_pipeline_loop2p) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10757,22 +13217,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_loop2p, &device_param->kernel_preferred_wgs_multiple_loop2p) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_loop2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2p; + device_param->kernel_local_mem_size_loop2p = 0; - device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_loop2p = 0; } - // loop2 - if (hashconfig->opts_type & OPTS_TYPE_LOOP2) { + // loop2: m%05u_loop2 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_loop2, &device_param->metal_pipeline_loop2) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10782,22 +13242,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_loop2, &device_param->kernel_preferred_wgs_multiple_loop2) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_loop2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2; + device_param->kernel_local_mem_size_loop2 = 0; - device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_loop2 = 0; } - // aux1 - if (hashconfig->opts_type & OPTS_TYPE_AUX1) { + // aux1: m%05u_aux1 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux1, &device_param->metal_pipeline_aux1) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10807,22 +13267,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux1, &device_param->kernel_preferred_wgs_multiple_aux1) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_aux1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux1; + device_param->kernel_local_mem_size_aux1 = 0; - device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_aux1 = 0; } - // aux2 - if (hashconfig->opts_type & OPTS_TYPE_AUX2) { + // aux2: m%05u_aux2 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux2, &device_param->metal_pipeline_aux2) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10832,22 +13292,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux2, &device_param->kernel_preferred_wgs_multiple_aux2) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_aux2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux2; + device_param->kernel_local_mem_size_aux2 = 0; - device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_aux2 = 0; } - // aux3 - if (hashconfig->opts_type & OPTS_TYPE_AUX3) { + // aux3: m%05u_aux3 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux3, &device_param->metal_pipeline_aux3) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10857,22 +13317,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux3, &device_param->kernel_preferred_wgs_multiple_aux3) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_aux3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux3; + device_param->kernel_local_mem_size_aux3 = 0; - device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_aux3 = 0; } - // aux4 - if (hashconfig->opts_type & OPTS_TYPE_AUX4) { + // aux4: m%05u_aux4 + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type); - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux4, &device_param->metal_pipeline_aux4) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name); @@ -10882,21 +13342,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux4, &device_param->kernel_preferred_wgs_multiple_aux4) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_aux4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux4; + device_param->kernel_local_mem_size_aux4 = 0; - device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_aux4 = 0; } } - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1; - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1; - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1; - // MP start if (user_options->slow_candidates == true) @@ -10906,9 +13361,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { if (user_options->attack_mode == ATTACK_MODE_BF) { - // mp_l + // mp_l: l_markov - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "l_markov", &device_param->metal_function_mp_l, &device_param->metal_pipeline_mp_l) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "l_markov"); @@ -10918,17 +13373,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp_l, &device_param->kernel_preferred_wgs_multiple_mp_l) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_mp_l = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_l; + device_param->kernel_local_mem_size_mp_l = 0; - device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_mp_l = 0; - // mp_r + // mp_r: r_markov - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1) + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "r_markov", &device_param->metal_function_mp_r, &device_param->metal_pipeline_mp_r) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "r_markov"); @@ -10938,26 +13393,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_mp_r = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_r; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r) == -1) return -1; - device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size; + device_param->kernel_local_mem_size_mp_r = 0; - if (user_options->attack_mode == ATTACK_MODE_BF) - { - if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) - { - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1; - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1; - } - } + device_param->kernel_dynamic_local_mem_size_mp_r = 0; } else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) { - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) + // mp_c: C_markov + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "C_markov", &device_param->metal_function_mp, &device_param->metal_pipeline_mp) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov"); @@ -10967,17 +13415,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_wgs_mp) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_preferred_wgs_multiple_mp) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp; + device_param->kernel_local_mem_size_mp = 0; - device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_mp = 0; } else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) { - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) + // mp_c: C_markov + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "C_markov", &device_param->metal_function_mp, &device_param->metal_pipeline_mp) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov"); @@ -10987,13 +13437,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_wgs_mp) == -1) return -1; - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_preferred_wgs_multiple_mp) == -1) return -1; - device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp; + device_param->kernel_local_mem_size_mp = 0; - device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; + device_param->kernel_dynamic_local_mem_size_mp = 0; } } @@ -11008,7 +13458,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } else { - if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1) + // amp + + if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_amp, "amp", &device_param->metal_function_amp, &device_param->metal_pipeline_amp) == -1) { event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "amp"); @@ -11018,51 +13470,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1; - - if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; - - device_param->kernel_dynamic_local_mem_size_amp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_amp; - - device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size; - } - - /* - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - // nothing to do - } - else - { - for (u32 i = 0; i < 5; i++) - { - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]); - - //if (CL_rc == -1) return -1; - } - - for (u32 i = 5; i < 6; i++) - { - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]); + if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_amp, &device_param->kernel_wgs_amp) == -1) return -1; - //if (CL_rc == -1) return -1; - } + if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_amp, &device_param->kernel_preferred_wgs_multiple_amp) == -1) return -1; - for (u32 i = 6; i < 7; i++) - { - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]); + device_param->kernel_local_mem_size_amp = 0; - //if (CL_rc == -1) return -1; - } + device_param->kernel_dynamic_local_mem_size_amp = 0; } - */ } // zero some data buffers - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result, device_param->size_results) == -1) return -1; /** * special buffers @@ -11070,28 +13492,28 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (user_options->slow_candidates == true) { - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, size_rules_c) == -1) return -1; } else { if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, size_rules_c) == -1) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs, size_combs) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, size_combs) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_combs, size_combs) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_combs_c, size_combs) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_root_css_buf, size_root_css) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_markov_css_buf, size_markov_css) == -1) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs, size_bfs) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, size_bfs) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_bfs, size_bfs) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_bfs_c, size_bfs) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tm_c, size_tm) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_root_css_buf, size_root_css) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_markov_css_buf, size_markov_css) == -1) return -1; } } @@ -11124,8 +13546,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp_buf32[6] = 0; device_param->kernel_params_mp_buf32[7] = 0; } - - //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } } else if (user_options->attack_mode == ATTACK_MODE_BF) { @@ -11142,12 +13562,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_l_buf32[6] = full80; if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1; if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1; - - //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } - //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } } } } + #endif // __APPLE__ if (device_param->is_opencl == true) { @@ -12164,13 +14582,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { const size_t undocumented_single_allocation_apple = 0x7fffffff; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; - if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1; if (size_bfs > undocumented_single_allocation_apple) memory_limit_hit = 1; if (size_combs > undocumented_single_allocation_apple) memory_limit_hit = 1; @@ -12337,6 +14748,25 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws, NULL, &device_param->metal_d_pws_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws_amp, NULL, &device_param->metal_d_pws_amp_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws_comp, NULL, &device_param->metal_d_pws_comp_buf) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws_idx, NULL, &device_param->metal_d_pws_idx) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_tmps, NULL, &device_param->metal_d_tmps) == -1) return -1; + if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_hooks, NULL, &device_param->metal_d_hooks) == -1) return -1; + + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_buf, device_param->size_pws) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_amp_buf, device_param->size_pws_amp) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_comp_buf, device_param->size_pws_comp) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_idx, device_param->size_pws_idx) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tmps, device_param->size_tmps) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_hooks, device_param->size_hooks) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_pws, NULL, &device_param->opencl_d_pws_buf) == -1) return -1; @@ -12379,7 +14809,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->scratch_buf = scratch_buf; #ifdef WITH_BRAIN - u8 *brain_link_in_buf = (u8 *) hcmalloc (size_brain_link_in); device_param->brain_link_in_buf = brain_link_in_buf; @@ -12415,6 +14844,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params[ 5] = &device_param->hip_d_hooks; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params[ 0] = device_param->metal_d_pws_buf; + device_param->kernel_params[ 4] = device_param->metal_d_tmps; + device_param->kernel_params[ 5] = device_param->metal_d_hooks; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf; @@ -12453,6 +14891,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? device_param->metal_d_pws_buf + : device_param->metal_d_pws_amp_buf; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -12484,6 +14931,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? device_param->metal_d_pws_buf + : device_param->metal_d_pws_amp_buf; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -12518,6 +14974,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_amp[0] = device_param->metal_d_pws_buf; + device_param->kernel_params_amp[1] = device_param->metal_d_pws_amp_buf; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf; @@ -12555,6 +15019,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params_decompress[0] = device_param->metal_d_pws_idx; + device_param->kernel_params_decompress[1] = device_param->metal_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? device_param->metal_d_pws_buf + : device_param->metal_d_pws_amp_buf; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx; @@ -12919,6 +15394,153 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) device_param->hip_context = NULL; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (device_param->metal_d_pws_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_buf); + if (device_param->metal_d_pws_amp_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_amp_buf); + if (device_param->metal_d_pws_comp_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_comp_buf); + if (device_param->metal_d_pws_idx) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_idx); + if (device_param->metal_d_rules) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_rules); + if (device_param->metal_d_rules_c) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_rules_c); + if (device_param->metal_d_combs) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_combs); + if (device_param->metal_d_combs_c) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_combs_c); + if (device_param->metal_d_bfs) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bfs); + if (device_param->metal_d_bfs_c) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bfs_c); + if (device_param->metal_d_bitmap_s1_a) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_a); + if (device_param->metal_d_bitmap_s1_b) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_b); + if (device_param->metal_d_bitmap_s1_c) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_c); + if (device_param->metal_d_bitmap_s1_d) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_d); + if (device_param->metal_d_bitmap_s2_a) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_a); + if (device_param->metal_d_bitmap_s2_b) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_b); + if (device_param->metal_d_bitmap_s2_c) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_c); + if (device_param->metal_d_bitmap_s2_d) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_d); + if (device_param->metal_d_plain_bufs) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_plain_bufs); + if (device_param->metal_d_digests_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_digests_buf); + if (device_param->metal_d_digests_shown) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_digests_shown); + if (device_param->metal_d_salt_bufs) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_salt_bufs); + if (device_param->metal_d_esalt_bufs) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_esalt_bufs); + if (device_param->metal_d_tmps) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_tmps); + if (device_param->metal_d_hooks) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_hooks); + if (device_param->metal_d_result) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_result); + if (device_param->metal_d_extra0_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra0_buf); + if (device_param->metal_d_extra1_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra1_buf); + if (device_param->metal_d_extra2_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra2_buf); + if (device_param->metal_d_extra3_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra3_buf); + if (device_param->metal_d_root_css_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_root_css_buf); + if (device_param->metal_d_markov_css_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_markov_css_buf); + if (device_param->metal_d_tm_c) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_tm_c); + if (device_param->metal_d_st_digests_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_st_digests_buf); + if (device_param->metal_d_st_salts_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_st_salts_buf); + if (device_param->metal_d_st_esalts_buf) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_st_esalts_buf); + if (device_param->metal_d_kernel_param) hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_kernel_param); + + if (device_param->metal_function1) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function1); + if (device_param->metal_function12) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function12); + if (device_param->metal_function2p) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function2p); + if (device_param->metal_function2) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function2); + if (device_param->metal_function2e) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function2e); + if (device_param->metal_function23) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function23); + if (device_param->metal_function3) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function3); + if (device_param->metal_function4) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function4); + if (device_param->metal_function_init2) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_init2); + if (device_param->metal_function_loop2p) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_loop2p); + if (device_param->metal_function_loop2) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_loop2); + if (device_param->metal_function_mp) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_mp); + if (device_param->metal_function_mp_l) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_mp_l); + if (device_param->metal_function_mp_r) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_mp_r); + if (device_param->metal_function_tm) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_tm); + if (device_param->metal_function_amp) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_amp); + if (device_param->metal_function_memset) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_memset); + if (device_param->metal_function_bzero) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_bzero); + if (device_param->metal_function_atinit) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_atinit); + if (device_param->metal_function_utf8toutf16le) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_utf8toutf16le); + if (device_param->metal_function_decompress) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_decompress); + if (device_param->metal_function_aux1) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux1); + if (device_param->metal_function_aux2) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux2); + if (device_param->metal_function_aux3) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux3); + if (device_param->metal_function_aux4) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux4); + + if (device_param->metal_library) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library); + if (device_param->metal_library_mp) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_mp); + if (device_param->metal_library_amp) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp); + if (device_param->metal_library_shared) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared); + + if (device_param->metal_command_queue) hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue); + + //if (device_param->metal_device) hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device); + + device_param->metal_d_pws_buf = NULL; + device_param->metal_d_pws_amp_buf = NULL; + device_param->metal_d_pws_comp_buf = NULL; + device_param->metal_d_pws_idx = NULL; + device_param->metal_d_rules = NULL; + device_param->metal_d_rules_c = NULL; + device_param->metal_d_combs = NULL; + device_param->metal_d_combs_c = NULL; + device_param->metal_d_bfs = NULL; + device_param->metal_d_bfs_c = NULL; + device_param->metal_d_bitmap_s1_a = NULL; + device_param->metal_d_bitmap_s1_b = NULL; + device_param->metal_d_bitmap_s1_c = NULL; + device_param->metal_d_bitmap_s1_d = NULL; + device_param->metal_d_bitmap_s2_a = NULL; + device_param->metal_d_bitmap_s2_b = NULL; + device_param->metal_d_bitmap_s2_c = NULL; + device_param->metal_d_bitmap_s2_d = NULL; + device_param->metal_d_plain_bufs = NULL; + device_param->metal_d_digests_buf = NULL; + device_param->metal_d_digests_shown = NULL; + device_param->metal_d_salt_bufs = NULL; + device_param->metal_d_esalt_bufs = NULL; + device_param->metal_d_tmps = NULL; + device_param->metal_d_hooks = NULL; + device_param->metal_d_result = NULL; + device_param->metal_d_extra0_buf = NULL; + device_param->metal_d_extra1_buf = NULL; + device_param->metal_d_extra2_buf = NULL; + device_param->metal_d_extra3_buf = NULL; + device_param->metal_d_root_css_buf = NULL; + device_param->metal_d_markov_css_buf = NULL; + device_param->metal_d_tm_c = NULL; + device_param->metal_d_st_digests_buf = NULL; + device_param->metal_d_st_salts_buf = NULL; + device_param->metal_d_st_esalts_buf = NULL; + device_param->metal_d_kernel_param = NULL; + device_param->metal_function1 = NULL; + device_param->metal_function12 = NULL; + device_param->metal_function2p = NULL; + device_param->metal_function2 = NULL; + device_param->metal_function2e = NULL; + device_param->metal_function23 = NULL; + device_param->metal_function3 = NULL; + device_param->metal_function4 = NULL; + device_param->metal_function_init2 = NULL; + device_param->metal_function_loop2p = NULL; + device_param->metal_function_loop2 = NULL; + device_param->metal_function_mp = NULL; + device_param->metal_function_mp_l = NULL; + device_param->metal_function_mp_r = NULL; + device_param->metal_function_tm = NULL; + device_param->metal_function_amp = NULL; + device_param->metal_function_memset = NULL; + device_param->metal_function_bzero = NULL; + device_param->metal_function_atinit = NULL; + device_param->metal_function_utf8toutf16le = NULL; + device_param->metal_function_decompress = NULL; + device_param->metal_function_aux1 = NULL; + device_param->metal_function_aux2 = NULL; + device_param->metal_function_aux3 = NULL; + device_param->metal_function_aux4 = NULL; + device_param->metal_library = NULL; + device_param->metal_library_mp = NULL; + device_param->metal_library_amp = NULL; + device_param->metal_library_shared = NULL; + device_param->metal_command_queue = NULL; + //device_param->metal_device = NULL; + } + #endif // __APPLE__ + if (device_param->is_opencl == true) { if (device_param->opencl_d_pws_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf); @@ -13207,6 +15829,14 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx) if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_root_css_buf, 0, mask_ctx->root_css_buf, device_param->size_root_css) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_markov_css_buf, 0, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf, CL_FALSE, 0, device_param->size_root_css, mask_ctx->root_css_buf, 0, NULL, NULL) == -1) return -1; @@ -13255,6 +15885,14 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_ if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_root_css_buf, 0, mask_ctx->root_css_buf, device_param->size_root_css) == -1) return -1; + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_markov_css_buf, 0, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf, CL_FALSE, 0, device_param->size_root_css, mask_ctx->root_css_buf, 0, NULL, NULL) == -1) return -1; diff --git a/src/ext_metal.m b/src/ext_metal.m new file mode 100644 index 000000000..c4e8a3a03 --- /dev/null +++ b/src/ext_metal.m @@ -0,0 +1,1416 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "memory.h" +#include "event.h" +#include "timer.h" +#include "ext_metal.h" + +#include + +#include +#include +#include + +typedef NS_ENUM(NSUInteger, hc_mtlFeatureSet) +{ + MTL_FEATURESET_MACOS_GPUFAMILY_1_V1 = 10000, + MTL_FEATURESET_MACOS_GPUFAMILY_1_V2 = 10001, + MTL_FEATURESET_MACOS_GPUFAMILY_1_V3 = 10003, + MTL_FEATURESET_MACOS_GPUFAMILY_1_V4 = 10004, + MTL_FEATURESET_MACOS_GPUFAMILY_2_V1 = 10005, + +} metalDeviceFeatureSet_macOS_t; + +typedef NS_ENUM(NSUInteger, hc_mtlLanguageVersion) +{ + MTL_LANGUAGEVERSION_1_0 = (1 << 16), + MTL_LANGUAGEVERSION_1_1 = (1 << 16) + 1, + MTL_LANGUAGEVERSION_1_2 = (1 << 16) + 2, + MTL_LANGUAGEVERSION_2_0 = (2 << 16), + MTL_LANGUAGEVERSION_2_1 = (2 << 16) + 1, + MTL_LANGUAGEVERSION_2_2 = (2 << 16) + 2, + MTL_LANGUAGEVERSION_2_3 = (2 << 16) + 3, + MTL_LANGUAGEVERSION_2_4 = (2 << 16) + 4, + +} metalLanguageVersion_t; + +static bool iokit_getGPUCore (void *hashcat_ctx, int *gpu_core) +{ + bool rc = false; + + CFMutableDictionaryRef matching = IOServiceMatching ("IOAccelerator"); + + io_service_t service = IOServiceGetMatchingService (kIOMasterPortDefault, matching); + + if (!service) + { + event_log_error (hashcat_ctx, "IOServiceGetMatchingService(): %08x", service); + + return rc; + } + + // "gpu-core-count" is present only on Apple Silicon + + CFNumberRef num = IORegistryEntryCreateCFProperty(service, CFSTR("gpu-core-count"), kCFAllocatorDefault, 0); + + int gc = 0; + + if (num == nil || CFNumberGetValue (num, kCFNumberIntType, &gc) == false) + { + //event_log_error (hashcat_ctx, "IORegistryEntryCreateCFProperty(): 'gpu-core-count' entry not found"); + } + else + { + *gpu_core = gc; + + rc = true; + } + + IOObjectRelease (service); + + return rc; +} + +static int hc_mtlInvocationHelper (id target, SEL selector, void *returnValue) +{ + if (target == nil) return -1; + if (selector == nil) return -1; + + if ([target respondsToSelector: selector]) + { + NSMethodSignature *signature = [object_getClass (target) instanceMethodSignatureForSelector: selector]; + NSInvocation *invocation = [NSInvocation invocationWithMethodSignature: signature]; + [invocation setTarget: target]; + [invocation setSelector: selector]; + [invocation invoke]; + [invocation getReturnValue: returnValue]; + + return 0; + } + + return -1; +} + +static int hc_mtlBuildOptionsToDict (void *hashcat_ctx, const char *build_options_buf, const char *include_path, NSMutableDictionary *build_options_dict) +{ + if (build_options_buf == nil) + { + event_log_error (hashcat_ctx, "%s(): build_options_buf is NULL", __func__); + return -1; + } + + if (build_options_dict == nil) + { + event_log_error (hashcat_ctx, "%s(): build_options_dict is NULL", __func__); + return -1; + } + + // NSString from build_options_buf + + NSString *options = [NSString stringWithCString: build_options_buf encoding: NSUTF8StringEncoding]; + + if (options == nil) + { + event_log_error (hashcat_ctx, "%s(): stringWithCString failed", __func__); + return -1; + } + + // replace '-D ' to '' + + options = [options stringByReplacingOccurrencesOfString:@"-D " withString:@""]; + + if (options == nil) + { + event_log_error (hashcat_ctx, "%s(): stringByReplacingOccurrencesOfString(-D) failed", __func__); + return -1; + } + + // replace '-I OpenCL ' to '' + + options = [options stringByReplacingOccurrencesOfString:@"-I OpenCL " withString:@""]; + + if (options == nil) + { + event_log_error (hashcat_ctx, "%s(): stringByReplacingOccurrencesOfString(-I OpenCL) failed", __func__); + return -1; + } + + //NSLog(@"options: '%@'", options); + + // creating NSDictionary from options + + NSArray *lines = [options componentsSeparatedByCharactersInSet:[NSCharacterSet whitespaceCharacterSet]]; + + for (NSString *aKeyValue in lines) + { + NSArray *components = [aKeyValue componentsSeparatedByString:@"="]; + + NSString *key = [components[0] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]]; + NSString *value = nil; + + if ([components count] != 2) + { + if ([key isEqualToString:[NSString stringWithUTF8String:"KERNEL_STATIC"]] || + [key isEqualToString:[NSString stringWithUTF8String:"IS_APPLE_SILICON"]] || + [key isEqualToString:[NSString stringWithUTF8String:"DYNAMIC_LOCAL"]] || + [key isEqualToString:[NSString stringWithUTF8String:"_unroll"]] || + [key isEqualToString:[NSString stringWithUTF8String:"NO_UNROLL"]] || + [key isEqualToString:[NSString stringWithUTF8String:"FORCE_DISABLE_SHM"]]) + { + value = @"1"; + } + else + { + //event_log_warning (hashcat_ctx, "%s(): skipping malformed build option: %s", __func__, [key UTF8String]); + + continue; + } + } + else + { + value = [components[1] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]]; + } + + [build_options_dict setObject:value forKey:key]; + } + + // if set, add INCLUDE_PATH to hack Apple kernel build from source limitation on -I usage + if (include_path != nil) + { + NSString *path_key = @"INCLUDE_PATH"; + NSString *path_value = [NSString stringWithCString: include_path encoding: NSUTF8StringEncoding]; + + [build_options_dict setObject:path_value forKey:path_key]; + } + + //NSLog(@"Dict:\n%@", build_options_dict); + + return 0; +} + +int mtl_init (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + memset (mtl, 0, sizeof (MTL_PTR)); + + mtl->devices = nil; + + if (MTLCreateSystemDefaultDevice() == nil) + { + event_log_error (hashcat_ctx, "Metal is not supported on this computer"); + + return -1; + } + + return 0; +} + +void mtl_close (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl) + { + if (mtl->devices) + { + int count = (int) CFArrayGetCount (mtl->devices); + for (int i = 0; i < count; i++) + { + mtl_device_id device = (mtl_device_id) CFArrayGetValueAtIndex (mtl->devices, i); + if (device != nil) + { + hc_mtlReleaseDevice (hashcat_ctx, device); + } + } + mtl->devices = nil; + } + + hcfree (backend_ctx->mtl); + + backend_ctx->mtl = NULL; + } +} + +int hc_mtlDeviceGetCount (void *hashcat_ctx, int *count) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == nil) return -1; + + CFArrayRef devices = (CFArrayRef) MTLCopyAllDevices(); + + if (devices == nil) + { + event_log_error (hashcat_ctx, "metalDeviceGetCount(): empty device objects"); + + return -1; + } + + mtl->devices = devices; + + *count = CFArrayGetCount (devices); + + return 0; +} + +int hc_mtlDeviceGet (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == nil) return -1; + + if (mtl->devices == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid devices pointer", __func__); + + return -1; + } + + mtl_device_id device = (mtl_device_id) CFArrayGetValueAtIndex (mtl->devices, ordinal); + + if (device == nil) + { + event_log_error (hashcat_ctx, "metalDeviceGet(): invalid index"); + + return -1; + } + + *metal_device = device; + + return 0; +} + +int hc_mtlDeviceGetName (void *hashcat_ctx, char *name, size_t len, mtl_device_id metal_device) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + if (len <= 0) + { + event_log_error (hashcat_ctx, "%s(): buffer length", __func__); + + return -1; + } + + id device_name_ptr = [metal_device name]; + + if (device_name_ptr == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to get device name", __func__); + + return -1; + } + + const char *device_name_str = [device_name_ptr UTF8String]; + + if (device_name_str == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to get UTF8String from device name", __func__); + + return -1; + } + + const size_t device_name_len = strlen (device_name_str); + + if (device_name_len <= 0) + { + event_log_error (hashcat_ctx, "%s(): invalid device name length", __func__); + + return -1; + } + + if (strncpy (name, device_name_str, (device_name_len > len) ? len : device_name_len) != name) + { + event_log_error (hashcat_ctx, "%s(): strncpy failed", __func__); + + return -1; + } + + return 0; +} + +int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t attrib, mtl_device_id metal_device) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + uint64_t val64 = 0; + bool valBool = false; + int valInt = 0; + + switch (attrib) + { + case MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: + // works only with Apple Silicon + if (iokit_getGPUCore (hashcat_ctx, pi) == false) *pi = 1; + break; + + case MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY: + *pi = 0; + + SEL hasUnifiedMemorySelector = NSSelectorFromString(@"hasUnifiedMemory"); + + hc_mtlInvocationHelper (metal_device, hasUnifiedMemorySelector, &valBool); + + *pi = (valBool == true) ? 1 : 0; + + break; + + case MTL_DEVICE_ATTRIBUTE_WARP_SIZE: + // return a fake size of 32, it will be updated later + *pi = 32; + break; + + case MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: + *pi = 0; + + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_2_V1] == true) *pi = 2; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V4] == true) *pi = 1; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V3] == true) *pi = 1; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V2] == true) *pi = 1; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V1] == true) *pi = 1; + + if (*pi == 0) + { + //event_log_error (hashcat_ctx, "%s(): no feature sets supported", __func__); + return -1; + } + + break; + + case MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: + *pi = 0; + + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_2_V1] == true) *pi = 1; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V4] == true) *pi = 4; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V3] == true) *pi = 3; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V2] == true) *pi = 2; + if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V1] == true) *pi = 1; + + if (*pi == 0) + { + //event_log_error (hashcat_ctx, "%s(): no feature sets supported", __func__); + return -1; + } + + break; + + case MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + // M1 max is 1024 + // [MTLComputePipelineState maxTotalThreadsPerThreadgroup] + *pi = 1024; + break; + + case MTL_DEVICE_ATTRIBUTE_CLOCK_RATE: + // unknown + *pi = 1000000; + break; + + case MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: + // 32k + *pi = 32768; + break; + + case MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: + // Maximum function memory allocation for a buffer in the constant address space + // 64k + *pi = 64 * 1024; + break; + + case MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE: + val64 = 0; + + SEL maxTransferRateSelector = NSSelectorFromString(@"maxTransferRate"); + + hc_mtlInvocationHelper (metal_device, maxTransferRateSelector, &val64); + + *pi = (val64 == 0) ? 0 : val64 / 125; // kb/s + + break; + + case MTL_DEVICE_ATTRIBUTE_HEADLESS: + valBool = [metal_device isHeadless]; + *pi = (valBool == true) ? 1 : 0; + break; + + case MTL_DEVICE_ATTRIBUTE_LOW_POWER: + valBool = [metal_device isLowPower]; + *pi = (valBool == true) ? 1 : 0; + break; + + case MTL_DEVICE_ATTRIBUTE_REMOVABLE: + valBool = [metal_device isRemovable]; + *pi = (valBool == true) ? 1 : 0; + break; + + case MTL_DEVICE_ATTRIBUTE_REGISTRY_ID: + *pi = (int) [metal_device registryID]; + break; + + case MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION: + *pi = 0; + + SEL locationSelector = NSSelectorFromString(@"location"); + valInt = 0; + + hc_mtlInvocationHelper (metal_device, locationSelector, &valInt); + + *pi = valInt; + + break; + + case MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER: + *pi = 0; + + SEL locationNumberSelector = NSSelectorFromString(@"locationNumber"); + + valInt = 0; + hc_mtlInvocationHelper (metal_device, locationNumberSelector, &valInt); + + *pi = valInt; + + break; + + default: + event_log_error (hashcat_ctx, "%s(): unknown attribute (%d)", __func__, attrib); + return -1; + } + + return 0; +} + +int hc_mtlMemGetInfo (void *hashcat_ctx, size_t *mem_free, size_t *mem_total) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + struct vm_statistics64 vm_stats; + vm_size_t page_size = 0; + unsigned int count = HOST_VM_INFO64_COUNT; + + mach_port_t port = mach_host_self(); + + if (host_page_size (port, &page_size) != KERN_SUCCESS) + { + event_log_error (hashcat_ctx, "metalMemGetInfo(): cannot get page_size"); + + return -1; + } + + if (host_statistics64 (port, HOST_VM_INFO64, (host_info64_t) &vm_stats, &count) != KERN_SUCCESS) + { + event_log_error (hashcat_ctx, "metalMemGetInfo(): cannot get vm_stats"); + + return -1; + } + + uint64_t mem_free_tmp = (uint64_t) (vm_stats.free_count - vm_stats.speculative_count) * page_size; + uint64_t mem_used_tmp = (uint64_t) (vm_stats.active_count + vm_stats.inactive_count + vm_stats.wire_count) * page_size; + + *mem_free = (size_t) mem_free_tmp; + *mem_total = (size_t) (mem_free_tmp + mem_used_tmp); + + return 0; +} + +int hc_mtlDeviceMaxMemAlloc (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + uint64_t memsize = 0; + + SEL maxBufferLengthSelector = NSSelectorFromString(@"maxBufferLength"); + + if (hc_mtlInvocationHelper (metal_device, maxBufferLengthSelector, &memsize) == -1) return -1; + + if (memsize == 0) + { + event_log_error (hashcat_ctx, "%s(): invalid maxBufferLength", __func__); + + return -1; + } + + *bytes = (size_t) memsize; + + return 0; +} + +int hc_mtlDeviceTotalMem (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + uint64_t memsize = 0; + + if (true) + { + memsize = [metal_device recommendedMaxWorkingSetSize]; + } + else + { + size_t len = sizeof (memsize); + + if (sysctlbyname ("hw.memsize", &memsize, &len, NULL, 0) != 0) + { + event_log_error (hashcat_ctx, "%s(): sysctlbyname(hw.memsize) failed", __func__); + + return -1; + } + } + + if (memsize == 0) + { + event_log_error (hashcat_ctx, "%s(): invalid memory size", __func__); + + return -1; + } + + *bytes = (size_t) memsize; + + return 0; +} + +int hc_mtlCreateCommandQueue (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue *command_queue) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + mtl_command_queue queue = [metal_device newCommandQueue]; + + if (queue == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create newCommandQueue", __func__); + + return -1; + } + + *command_queue = queue; + + return 0; + +} + +int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_library metal_library, const char *func_name, mtl_function *metal_function, mtl_pipeline *metal_pipeline) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + if (metal_library == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid library", __func__); + + return -1; + } + + if (func_name == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid function name", __func__); + + return -1; + } + + NSError *error = nil; + + NSString *f_name = [NSString stringWithCString: func_name encoding: NSUTF8StringEncoding]; + + mtl_function mtl_func = [metal_library newFunctionWithName: f_name]; + + if (mtl_func == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create '%s' function", __func__, func_name); + + return -1; + } + + mtl_pipeline mtl_pipe = [metal_device newComputePipelineStateWithFunction: mtl_func error: &error]; + + if (error != nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create '%s' pipeline, %s", __func__, func_name, [[error localizedDescription] UTF8String]); + + return -1; + } + + if (mtl_pipe == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create '%s' pipeline", __func__, func_name); + + return -1; + } + + *metal_function = mtl_func; + *metal_pipeline = mtl_pipe; + + return 0; +} + +int hc_mtlGetMaxTotalThreadsPerThreadgroup (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *maxTotalThreadsPerThreadgroup) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_pipeline == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid pipeline", __func__); + + return -1; + } + + *maxTotalThreadsPerThreadgroup = [metal_pipeline maxTotalThreadsPerThreadgroup]; + + return 0; +} + +int hc_mtlGetThreadExecutionWidth (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *threadExecutionWidth) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_pipeline == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid pipeline", __func__); + + return -1; + } + + *threadExecutionWidth = [metal_pipeline threadExecutionWidth]; + + return 0; +} + +int hc_mtlCreateBuffer (void *hashcat_ctx, mtl_device_id metal_device, size_t size, void *ptr, mtl_mem *metal_buffer) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid device", __func__); + + return -1; + } + + mtl_mem buf = NULL; + + MTLResourceOptions bufferOptions = MTLResourceStorageModeShared; + + if (ptr == NULL) + { + buf = [metal_device newBufferWithLength:size options:bufferOptions]; + } + else + { + buf = [metal_device newBufferWithBytes:ptr length:size options:bufferOptions]; + } + + if (buf == nil) + { + event_log_error (hashcat_ctx, "%s(): %s failed (size: %zu)", __func__, (ptr == NULL) ? "newBufferWithLength" : "newBufferWithBytes", size); + + return -1; + } + + *metal_buffer = buf; + + return 0; +} + +int hc_mtlReleaseMemObject (void *hashcat_ctx, mtl_mem metal_buffer) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl; + + if (mtl == NULL) return -1; + + if (metal_buffer == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal buffer", __func__); + + return -1; + } + + [metal_buffer setPurgeableState:MTLPurgeableStateEmpty]; + [metal_buffer release]; + + return 0; +} + +int hc_mtlReleaseFunction (void *hashcat_ctx, mtl_function metal_function) +{ + if (metal_function == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal function", __func__); + + return -1; + } + + [metal_function release]; + + return 0; +} + +int hc_mtlReleaseLibrary (void *hashcat_ctx, mtl_library metal_library) +{ + if (metal_library == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal library", __func__); + + return -1; + } + + [metal_library release]; + metal_library = nil; + + return 0; +} + +int hc_mtlReleaseCommandQueue (void *hashcat_ctx, mtl_command_queue command_queue) +{ + if (command_queue == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal command queue", __func__); + + return -1; + } + + [command_queue release]; + command_queue = nil; + + return 0; +} + +int hc_mtlReleaseDevice (void *hashcat_ctx, mtl_device_id metal_device) +{ + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal device", __func__); + + return -1; + } + + [metal_device release]; + metal_device = nil; + + return 0; +} + +// device to device + +int hc_mtlMemcpyDtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, mtl_mem buf_src, size_t buf_src_off, size_t buf_size) +{ + if (command_queue == nil) + { + event_log_error (hashcat_ctx, "%s(): metal command queue is invalid", __func__); + return -1; + } + + if (buf_src == nil) + { + event_log_error (hashcat_ctx, "%s(): metal src buffer is invalid", __func__); + return -1; + } + + if (buf_src_off < 0) + { + event_log_error (hashcat_ctx, "%s(): src buffer offset is invalid", __func__); + return -1; + } + + if (buf_dst == nil) + { + event_log_error (hashcat_ctx, "%s(): metal dst buffer is invalid", __func__); + return -1; + } + + if (buf_dst_off < 0) + { + event_log_error (hashcat_ctx, "%s(): dst buffer offset is invalid", __func__); + return -1; + } + + if (buf_size <= 0) + { + event_log_error (hashcat_ctx, "%s(): buffer size is invalid", __func__); + return -1; + } + + id command_buffer = [command_queue commandBuffer]; + + if (command_buffer == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create a new command buffer", __func__); + return -1; + } + + id blit_encoder = [command_buffer blitCommandEncoder]; + + if (blit_encoder == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create a blit command encoder", __func__); + return -1; + } + + // copy + + [blit_encoder copyFromBuffer: buf_src sourceOffset: buf_src_off toBuffer: buf_dst destinationOffset: buf_dst_off size: buf_size]; + + // finish encoding and start the data transfer + + [blit_encoder endEncoding]; + [command_buffer commit]; + + // Wait for complete + + [command_buffer waitUntilCompleted]; + + return 0; +} + +// host to device + +int hc_mtlMemcpyHtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, const void *buf_src, size_t buf_size) +{ + if (command_queue == nil) + { + event_log_error (hashcat_ctx, "%s(): metal command queue is invalid", __func__); + return -1; + } + + if (buf_src == nil) + { + event_log_error (hashcat_ctx, "%s(): metal src buffer is invalid", __func__); + return -1; + } + + if (buf_dst == nil) + { + event_log_error (hashcat_ctx, "%s(): host dst buffer is invalid", __func__); + return -1; + } + + if (buf_size <= 0) + { + event_log_error (hashcat_ctx, "%s(): buffer size is invalid", __func__); + return -1; + } + + if (buf_dst_off < 0) + { + event_log_error (hashcat_ctx, "%s(): buffer dst offset is invalid", __func__); + return -1; + } + + void *buf_dst_ptr = [buf_dst contents]; + + if (buf_dst_ptr == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to get metal buffer data pointer", __func__); + + return -1; + } + + if (memcpy (buf_dst_ptr + buf_dst_off, buf_src, buf_size) != buf_dst_ptr + buf_dst_off) + { + event_log_error (hashcat_ctx, "%s(): memcpy failed", __func__); + + return -1; + } + + [buf_dst didModifyRange: NSMakeRange (buf_dst_off, buf_size)]; + + return 0; +} + +// device to host + +int hc_mtlMemcpyDtoH (void *hashcat_ctx, mtl_command_queue command_queue, void *buf_dst, mtl_mem buf_src, size_t buf_src_off, size_t buf_size) +{ + if (command_queue == nil) + { + event_log_error (hashcat_ctx, "%s(): metal command queue is invalid", __func__); + return -1; + } + + if (buf_src == nil) + { + event_log_error (hashcat_ctx, "%s(): metal src buffer is invalid", __func__); + return -1; + } + + if (buf_dst == nil) + { + event_log_error (hashcat_ctx, "%s(): host dst buffer is invalid", __func__); + return -1; + } + + if (buf_size <= 0) + { + event_log_error (hashcat_ctx, "%s(): buffer size is invalid", __func__); + return -1; + } + + id command_buffer = [command_queue commandBuffer]; + + if (command_buffer == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create a new command buffer", __func__); + return -1; + } + + id blit_encoder = [command_buffer blitCommandEncoder]; + + [blit_encoder synchronizeResource: buf_src]; + + // Finish encoding and start the data transfer to the CPU + + [blit_encoder endEncoding]; + [command_buffer commit]; + + // Wait for complete + + [command_buffer waitUntilCompleted]; + + // get src buf ptr + + void *buf_src_ptr = [buf_src contents]; + + if (buf_src_ptr == nil) + { + event_log_error (hashcat_ctx, "%s(): failed to get metal buffer data pointer", __func__); + + return -1; + } + + if (memcpy (buf_dst, buf_src_ptr + buf_src_off, buf_size) != buf_dst) + { + event_log_error (hashcat_ctx, "%s(): memcpy failed", __func__); + + return -1; + } + + return 0; +} + +int hc_mtlRuntimeGetVersionString (void *hashcat_ctx, char *runtimeVersion_str, size_t *size) +{ + CFURLRef plist_url = CFURLCreateWithFileSystemPath (kCFAllocatorDefault, CFSTR("/System/Library/Frameworks/Metal.framework/Versions/Current/Resources/version.plist"), kCFURLPOSIXPathStyle, false); + + if (plist_url == NULL) + { + event_log_error (hashcat_ctx, "%s(): CFURLCreateWithFileSystemPath() failed\n", __func__); + + return -1; + } + + CFReadStreamRef plist_stream = CFReadStreamCreateWithFile (NULL, plist_url); + + if (plist_stream == NULL) + { + event_log_error (hashcat_ctx, "%s(): CFReadStreamCreateWithFile() failed\n", __func__); + + CFRelease (plist_url); + + return -1; + } + + if (CFReadStreamOpen (plist_stream) == false) + { + event_log_error (hashcat_ctx, "%s(): CFReadStreamOpen() failed\n", __func__); + + CFRelease (plist_stream); + CFRelease (plist_url); + + return -1; + } + + CFPropertyListRef plist_prop = CFPropertyListCreateWithStream (NULL, plist_stream, 0, kCFPropertyListImmutable, NULL, NULL); + + if (plist_prop == NULL) + { + event_log_error (hashcat_ctx, "%s(): CFPropertyListCreateWithStream() failed\n", __func__); + + CFReadStreamClose (plist_stream); + + CFRelease (plist_stream); + CFRelease (plist_url); + return -1; + } + + CFStringRef runtime_version_str = CFRetain (CFDictionaryGetValue (plist_prop, CFSTR("CFBundleVersion"))); + + if (runtime_version_str != NULL) + { + if (runtimeVersion_str == NULL) + { + CFIndex len = CFStringGetLength (runtime_version_str); + CFIndex maxSize = CFStringGetMaximumSizeForEncoding (len, kCFStringEncodingUTF8) + 1; + *size = maxSize; + return 0; + } + + CFIndex maxSize = *size; + + if (CFStringGetCString (runtime_version_str, runtimeVersion_str, maxSize, kCFStringEncodingUTF8) == false) + { + event_log_error (hashcat_ctx, "%s(): CFStringGetCString() failed\n", __func__); + + hcfree (runtimeVersion_str); + + return -1; + } + + return 0; + } + + return -1; +} + +int hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder) +{ + if (metal_pipeline == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_pipeline", __func__); + + return -1; + } + + if (metal_command_queue == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_command_queue", __func__); + + return -1; + } + + id metal_commandBuffer = [metal_command_queue commandBuffer]; + + if (metal_commandBuffer == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_commandBuffer", __func__); + + return -1; + } + + id metal_commandEncoder = [metal_commandBuffer computeCommandEncoder]; + + if (metal_commandEncoder == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_commandBuffer", __func__); + + return -1; + } + + [metal_commandEncoder setComputePipelineState: metal_pipeline]; + + *metal_command_buffer = metal_commandBuffer; + *metal_command_encoder = metal_commandEncoder; + + return 0; +} + +int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size) +{ + if (metal_command_encoder == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_command_encoder", __func__); + + return -1; + } + + if (buf == nil && host_data == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid buf/host_data", __func__); + + return -1; + } + + if (buf == nil) + { + if (host_data_size <= 0) + { + event_log_error (hashcat_ctx, "%s(): invalid host_data size", __func__); + + return -1; + } + } + else + { + if (off < 0) + { + event_log_error (hashcat_ctx, "%s(): invalid buf off", __func__); + + return -1; + } + } + + if (idx < 0) + { + event_log_error (hashcat_ctx, "%s(): invalid buf/host_data idx", __func__); + + return -1; + } + + if (host_data == nil) + { + [metal_command_encoder setBuffer: buf offset: off atIndex: idx]; + } + else + { + [metal_command_encoder setBytes: host_data length: host_data_size atIndex: idx]; + } + + return 0; +} + +int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms) +{ + MTLSize numThreadgroups = {local_work_size, 1, 1}; + MTLSize threadsGroup = {global_work_size, 1, 1}; + + if (metal_command_encoder == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_command_encoder", __func__); + + return -1; + } + + if (metal_command_buffer == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal_command_buffer", __func__); + + return -1; + } + + [metal_command_encoder dispatchThreadgroups: threadsGroup threadsPerThreadgroup: numThreadgroups]; + + [metal_command_encoder endEncoding]; + [metal_command_buffer commit]; + [metal_command_buffer waitUntilCompleted]; + + CFTimeInterval myGPUStartTime = 0; + CFTimeInterval myGPUEndTime = 0; + + SEL myGPUStartTimeSelector = NSSelectorFromString(@"GPUStartTime"); + SEL myGPUEndTimeSelector = NSSelectorFromString(@"GPUEndTime"); + + if (hc_mtlInvocationHelper (metal_command_buffer, myGPUStartTimeSelector, &myGPUStartTime) == -1) return -1; + if (hc_mtlInvocationHelper (metal_command_buffer, myGPUEndTimeSelector, &myGPUEndTime) == -1) return -1; + + CFTimeInterval elapsed = myGPUEndTime - myGPUStartTime; + + *ms = (1000.0 * elapsed); + + return 0; +} + +int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device, const char *cached_file, mtl_library *metal_library) +{ + NSError *error = nil; + + if (metal_device == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metal device", __func__); + + return -1; + } + + if (cached_file == nil) + { + event_log_error (hashcat_ctx, "%s(): invalid metallib", __func__); + + return -1; + } + + NSString *k_string = [NSString stringWithCString: cached_file encoding: NSUTF8StringEncoding]; + + if (k_string != nil) + { + id r = [metal_device newLibraryWithFile: k_string error: &error]; + + if (error != nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]); + return -1; + } + + *metal_library = r; + + return 0; + } + + return -1; +} + +int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device, const char *kernel_sources, const char *build_options_buf, const char *cpath, mtl_library *metal_library) +{ + NSError *error = nil; + + NSString *k_string = [NSString stringWithCString: kernel_sources encoding: NSUTF8StringEncoding]; + + if (k_string != nil) + { + MTLCompileOptions *compileOptions = [MTLCompileOptions new]; + + NSMutableDictionary *build_options_dict = nil; + + if (build_options_buf != nil) + { + //printf("using build_opts from arg:\n%s\n", build_options_buf); + + build_options_dict = [NSMutableDictionary dictionary]; //[[NSMutableDictionary alloc] init]; + + if (hc_mtlBuildOptionsToDict (hashcat_ctx, build_options_buf, cpath, build_options_dict) == -1) + { + event_log_error (hashcat_ctx, "%s(): failed to build options dictionary", __func__); + + [build_options_dict release]; + return -1; + } + + compileOptions.preprocessorMacros = build_options_dict; + } + + // todo: detect current os version and choose the right +// compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3; +/* + if (@available(macOS 12.0, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_4; + } + else if (@available(macOS 11.0, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3; + } + else if (@available(macOS 10.15, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_2; + } + else if (@available(macOS 10.14, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_1; + } + else if (@available(macOS 10.13, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_0; + } + else if (@available(macOS 10.12, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_1_2; + } + else if (@available(macOS 10.11, *)) + { + compileOptions.languageVersion = MTL_LANGUAGEVERSION_1_1; + } +*/ + id r = [metal_device newLibraryWithSource: k_string options: compileOptions error: &error]; + + [compileOptions release]; + compileOptions = nil; + + if (build_options_dict != nil) + { + [build_options_dict release]; + build_options_dict = nil; + } + + if (error != nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create metal library, %s", __func__, [[error localizedDescription] UTF8String]); + + return -1; + } + + *metal_library = r; + + return 0; + } + + return -1; +} diff --git a/src/hashes.c b/src/hashes.c index ffb47b182..3ec190812 100644 --- a/src/hashes.c +++ b/src/hashes.c @@ -355,6 +355,20 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + rc = hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmps, device_param->metal_d_tmps, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size); + + if (rc == -1) + { + hcfree (tmps); + + return -1; + } + } + #endif + if (device_param->is_opencl == true) { rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_FALSE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event); @@ -574,6 +588,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &num_cracked, device_param->metal_d_result, 0, sizeof (u32)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -624,6 +645,20 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + rc = hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, cracked, device_param->metal_d_plain_bufs, 0, num_cracked * sizeof (plain_t)); + + if (rc == -1) + { + hcfree (cracked); + + return -1; + } + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -703,6 +738,18 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) } } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + rc = run_metal_kernel_memset32 (hashcat_ctx, device_param, device_param->metal_d_digests_shown, salt_buf->digests_offset * sizeof (u32), 0, salt_buf->digests_cnt * sizeof (u32)); + + if (rc == -1) + { + break; + } + } + #endif + if (device_param->is_opencl == true) { /* NOTE: run_opencl_kernel_bzero() does not handle buffer offset */ @@ -751,6 +798,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, sizeof (u32)) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result, sizeof (u32)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, sizeof (u32)) == -1) return -1; diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c index 35edcf92b..c4ccbc51c 100644 --- a/src/modules/module_01500.c +++ b/src/modules/module_01500.c @@ -151,7 +151,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) { - hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff); + hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff); } return jit_build_options; @@ -161,7 +161,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) { - hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); + hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); } } // ROCM @@ -169,7 +169,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) { - hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); + hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); } } // ROCM @@ -177,7 +177,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) { - hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff); + hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff); } else { @@ -188,7 +188,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) { - hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff); + hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff); } } diff --git a/src/modules/module_06211.c b/src/modules/module_06211.c index 4217f28ed..43b455dba 100644 --- a/src/modules/module_06211.c +++ b/src/modules/module_06211.c @@ -77,7 +77,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_06212.c b/src/modules/module_06212.c index 3d482b1b4..f610dc775 100644 --- a/src/modules/module_06212.c +++ b/src/modules/module_06212.c @@ -77,7 +77,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_06213.c b/src/modules/module_06213.c index d6c164198..04803cc70 100644 --- a/src/modules/module_06213.c +++ b/src/modules/module_06213.c @@ -77,7 +77,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_06241.c b/src/modules/module_06241.c index 752cbf39c..8189b3fc6 100644 --- a/src/modules/module_06241.c +++ b/src/modules/module_06241.c @@ -78,7 +78,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_06242.c b/src/modules/module_06242.c index ee63223ce..6f93913dc 100644 --- a/src/modules/module_06242.c +++ b/src/modules/module_06242.c @@ -78,7 +78,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_06243.c b/src/modules/module_06243.c index acd26004a..164fe982f 100644 --- a/src/modules/module_06243.c +++ b/src/modules/module_06243.c @@ -78,7 +78,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_06800.c b/src/modules/module_06800.c index a897b4f69..d76376921 100644 --- a/src/modules/module_06800.c +++ b/src/modules/module_06800.c @@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_08900.c b/src/modules/module_08900.c index bca25fc39..06507a856 100644 --- a/src/modules/module_08900.c +++ b/src/modules/module_08900.c @@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } // amdgpu-pro-20.50-1234664-ubuntu-20.04 (legacy) @@ -266,7 +269,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY char *jit_build_options = NULL; - hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64, + hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64, hashes->salts_buf[0].scrypt_N, hashes->salts_buf[0].scrypt_r, hashes->salts_buf[0].scrypt_p, diff --git a/src/modules/module_09300.c b/src/modules/module_09300.c index cb61a0f6c..74c4fe913 100644 --- a/src/modules/module_09300.c +++ b/src/modules/module_09300.c @@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; @@ -258,7 +261,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY char *jit_build_options = NULL; - hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64, + hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64, hashes->salts_buf[0].scrypt_N, hashes->salts_buf[0].scrypt_r, hashes->salts_buf[0].scrypt_p, diff --git a/src/modules/module_09500.c b/src/modules/module_09500.c index cbbaca530..5fb4be868 100644 --- a/src/modules/module_09500.c +++ b/src/modules/module_09500.c @@ -62,7 +62,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_09600.c b/src/modules/module_09600.c index a9831b9aa..d9753cbe2 100644 --- a/src/modules/module_09600.c +++ b/src/modules/module_09600.c @@ -65,7 +65,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_12700.c b/src/modules/module_12700.c index 419ab38e9..f69858246 100644 --- a/src/modules/module_12700.c +++ b/src/modules/module_12700.c @@ -60,7 +60,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13711.c b/src/modules/module_13711.c index df4b038ef..7d956350f 100644 --- a/src/modules/module_13711.c +++ b/src/modules/module_13711.c @@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13712.c b/src/modules/module_13712.c index 585fde139..3093e80e2 100644 --- a/src/modules/module_13712.c +++ b/src/modules/module_13712.c @@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13713.c b/src/modules/module_13713.c index c2d573f4d..3a53a4ef5 100644 --- a/src/modules/module_13713.c +++ b/src/modules/module_13713.c @@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13721.c b/src/modules/module_13721.c index 2b54cf267..9db377617 100644 --- a/src/modules/module_13721.c +++ b/src/modules/module_13721.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) diff --git a/src/modules/module_13722.c b/src/modules/module_13722.c index 4b3242ca4..4a899279e 100644 --- a/src/modules/module_13722.c +++ b/src/modules/module_13722.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) diff --git a/src/modules/module_13723.c b/src/modules/module_13723.c index 3d28e8437..4871ca55a 100644 --- a/src/modules/module_13723.c +++ b/src/modules/module_13723.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) diff --git a/src/modules/module_13733.c b/src/modules/module_13733.c index ac8b7c030..da0dbc473 100644 --- a/src/modules/module_13733.c +++ b/src/modules/module_13733.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AppleM1, OpenCL, MTLCompilerService never-end (pure/optimized kernel) if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13741.c b/src/modules/module_13741.c index f6df663e8..74c0283fd 100644 --- a/src/modules/module_13741.c +++ b/src/modules/module_13741.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13742.c b/src/modules/module_13742.c index 59aca8d53..0206b424b 100644 --- a/src/modules/module_13742.c +++ b/src/modules/module_13742.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13743.c b/src/modules/module_13743.c index cf58c6d44..093fbfd1f 100644 --- a/src/modules/module_13743.c +++ b/src/modules/module_13743.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c index fb999023a..57435bca3 100644 --- a/src/modules/module_13751.c +++ b/src/modules/module_13751.c @@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c index 02fe05693..6cc3b9153 100644 --- a/src/modules/module_13752.c +++ b/src/modules/module_13752.c @@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c index 443ba0274..f5fefbdc1 100644 --- a/src/modules/module_13753.c +++ b/src/modules/module_13753.c @@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13761.c b/src/modules/module_13761.c index dd686887a..bb18d91f8 100644 --- a/src/modules/module_13761.c +++ b/src/modules/module_13761.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13762.c b/src/modules/module_13762.c index 116db35af..c75b98a49 100644 --- a/src/modules/module_13762.c +++ b/src/modules/module_13762.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_13763.c b/src/modules/module_13763.c index 070b8df21..43fac2ef3 100644 --- a/src/modules/module_13763.c +++ b/src/modules/module_13763.c @@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_14700.c b/src/modules/module_14700.c index 5ce1ee308..8c447a784 100644 --- a/src/modules/module_14700.c +++ b/src/modules/module_14700.c @@ -66,7 +66,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_15200.c b/src/modules/module_15200.c index 976f6e349..66a994216 100644 --- a/src/modules/module_15200.c +++ b/src/modules/module_15200.c @@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c index 5f97873af..c448572e1 100644 --- a/src/modules/module_15700.c +++ b/src/modules/module_15700.c @@ -279,7 +279,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY char *jit_build_options = NULL; - hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64, + hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64, hashes->salts_buf[0].scrypt_N, hashes->salts_buf[0].scrypt_r, hashes->salts_buf[0].scrypt_p, diff --git a/src/modules/module_18900.c b/src/modules/module_18900.c index 436afdfd8..246ec4b46 100644 --- a/src/modules/module_18900.c +++ b/src/modules/module_18900.c @@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_19600.c b/src/modules/module_19600.c index 29ccfe263..1e9861ad7 100644 --- a/src/modules/module_19600.c +++ b/src/modules/module_19600.c @@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_19700.c b/src/modules/module_19700.c index beab1009b..b47ec6e40 100644 --- a/src/modules/module_19700.c +++ b/src/modules/module_19700.c @@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_19800.c b/src/modules/module_19800.c index c7a50985a..5cab711b1 100644 --- a/src/modules/module_19800.c +++ b/src/modules/module_19800.c @@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_19900.c b/src/modules/module_19900.c index 7b56ae41e..7a09d0f68 100644 --- a/src/modules/module_19900.c +++ b/src/modules/module_19900.c @@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_20011.c b/src/modules/module_20011.c index 32b4c2d29..6a6f917d3 100644 --- a/src/modules/module_20011.c +++ b/src/modules/module_20011.c @@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_20012.c b/src/modules/module_20012.c index 4c81a7ccc..f6ef86149 100644 --- a/src/modules/module_20012.c +++ b/src/modules/module_20012.c @@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_20013.c b/src/modules/module_20013.c index f45e68ee7..d92ad4ca8 100644 --- a/src/modules/module_20013.c +++ b/src/modules/module_20013.c @@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_22700.c b/src/modules/module_22700.c index 0aae58e78..462128712 100644 --- a/src/modules/module_22700.c +++ b/src/modules/module_22700.c @@ -266,7 +266,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY char *jit_build_options = NULL; - hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64, + hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64, hashes->salts_buf[0].scrypt_N, hashes->salts_buf[0].scrypt_r, hashes->salts_buf[0].scrypt_p, diff --git a/src/modules/module_23100.c b/src/modules/module_23100.c index db69c4947..ad5b36b95 100644 --- a/src/modules/module_23100.c +++ b/src/modules/module_23100.c @@ -67,7 +67,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_24500.c b/src/modules/module_24500.c index b6d7c380f..3dad9ce8d 100644 --- a/src/modules/module_24500.c +++ b/src/modules/module_24500.c @@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { - return true; + if (device_param->is_metal == false) + { + return true; + } } return false; diff --git a/src/modules/module_27700.c b/src/modules/module_27700.c index 346b9f73d..ce33d36cb 100644 --- a/src/modules/module_27700.c +++ b/src/modules/module_27700.c @@ -264,7 +264,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY char *jit_build_options = NULL; - hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64, + hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64, hashes->salts_buf[0].scrypt_N, hashes->salts_buf[0].scrypt_r, hashes->salts_buf[0].scrypt_p, diff --git a/src/modules/module_28200.c b/src/modules/module_28200.c index 648a26403..c262bb55f 100644 --- a/src/modules/module_28200.c +++ b/src/modules/module_28200.c @@ -258,7 +258,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY char *jit_build_options = NULL; - hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64, + hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64, hashes->salts_buf[0].scrypt_N, hashes->salts_buf[0].scrypt_r, hashes->salts_buf[0].scrypt_p, diff --git a/src/selftest.c b/src/selftest.c index 28b7b6d4f..977c38682 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -39,6 +39,15 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param device_param->kernel_params[18] = &device_param->hip_d_st_esalts_buf; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params[15] = device_param->metal_d_st_digests_buf; + device_param->kernel_params[17] = device_param->metal_d_st_salts_buf; + device_param->kernel_params[18] = device_param->metal_d_st_esalts_buf; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf; @@ -105,6 +114,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -143,6 +159,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -210,6 +233,15 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, &comb, 1 * sizeof (pw_t)) == -1) return -1; + + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL) == -1) return -1; @@ -248,6 +280,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -302,6 +341,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, &bf, 1 * sizeof (bf_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bfs_c, 0, &bf, 1 * sizeof (bf_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_FALSE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL) == -1) return -1; @@ -401,6 +447,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -432,6 +485,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -487,6 +547,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 1) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->metal_d_pws_buf, 1) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 1) == -1) return -1; @@ -513,6 +580,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, device_param->size_hooks) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -531,6 +605,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, device_param->size_hooks) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -591,6 +672,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, device_param->size_hooks) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { /* blocking */ @@ -609,6 +697,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, device_param->size_hooks) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -701,6 +796,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event3, device_param->hip_stream) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &num_cracked, device_param->metal_d_result, 0, sizeof (u32)) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_FALSE, 0, sizeof (u32), &num_cracked, 0, NULL, &opencl_event) == -1) return -1; @@ -747,6 +849,22 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + device_param->kernel_params[15] = device_param->metal_d_digests_buf; + device_param->kernel_params[17] = device_param->metal_d_salt_bufs; + device_param->kernel_params[18] = device_param->metal_d_esalt_bufs; + + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_buf, device_param->size_pws) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tmps, device_param->size_tmps) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_hooks, device_param->size_hooks) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result, device_param->size_results) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { device_param->kernel_params[15] = &device_param->opencl_d_digests_buf; @@ -773,6 +891,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, device_param->size_rules_c) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1; @@ -792,6 +917,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, device_param->size_rules_c) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1; @@ -809,6 +941,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, device_param->size_combs) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_combs_c, device_param->size_combs) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs) == -1) return -1; @@ -826,6 +965,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, device_param->size_bfs) == -1) return -1; } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_bfs_c, device_param->size_bfs) == -1) return -1; + } + #endif + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs) == -1) return -1; @@ -866,6 +1012,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! HIP kernel self-test failed.", device_param->device_id + 1); } + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! Metal kernel self-test failed.", device_param->device_id + 1); + } + #endif + if (device_param->is_opencl == true) { event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1); diff --git a/src/terminal.c b/src/terminal.c index e40ae8b11..aab94cc84 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -972,6 +972,112 @@ void backend_info (hashcat_ctx_t *hashcat_ctx) } } + #if defined (__APPLE__) + if (backend_ctx->mtl) + { + event_log_info (hashcat_ctx, "Metal Info:"); + event_log_info (hashcat_ctx, "==========="); + event_log_info (hashcat_ctx, NULL); + + int metal_devices_cnt = backend_ctx->metal_devices_cnt; + int metal_runtimeVersion = backend_ctx->metal_runtimeVersion; + char *metal_runtimeVersionStr = backend_ctx->metal_runtimeVersionStr; + + if (metal_runtimeVersionStr != NULL) + { + event_log_info (hashcat_ctx, "Metal.Version.: %s", metal_runtimeVersionStr); + } + else + { + event_log_info (hashcat_ctx, "Metal.Version.: %d", metal_runtimeVersion); + } + + event_log_info (hashcat_ctx, NULL); + + for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++) + { + const int backend_devices_idx = backend_ctx->backend_device_from_metal[metal_devices_idx]; + + const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx; + + int device_id = device_param->device_id; + int device_mtl_maj = device_param->mtl_major; + int device_mtl_min = device_param->mtl_minor; + int device_max_transfer_rate = device_param->device_max_transfer_rate; + int device_physical_location = device_param->device_physical_location; + int device_location_number = device_param->device_location_number; + int device_registryID = device_param->device_registryID; + int device_is_headless = device_param->device_is_headless; + int device_is_low_power = device_param->device_is_low_power; + int device_is_removable = device_param->device_is_removable; + + char *device_name = device_param->device_name; + + u32 device_processors = device_param->device_processors; + + u64 device_global_mem = device_param->device_global_mem; + u64 device_maxmem_alloc = device_param->device_maxmem_alloc; + u64 device_available_mem = device_param->device_available_mem; + u64 device_local_mem_size = device_param->device_local_mem_size; + + cl_device_type opencl_device_type = device_param->opencl_device_type; + cl_uint opencl_device_vendor_id = device_param->opencl_device_vendor_id; + char *opencl_device_vendor = device_param->opencl_device_vendor; + + if (device_param->device_id_alias_cnt) + { + event_log_info (hashcat_ctx, "Backend Device ID #%d (Alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1); + } + else + { + event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1); + } + + event_log_info (hashcat_ctx, " Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator"))); + event_log_info (hashcat_ctx, " Vendor.ID......: %u", opencl_device_vendor_id); + event_log_info (hashcat_ctx, " Vendor.........: %s", opencl_device_vendor); + event_log_info (hashcat_ctx, " Name...........: %s", device_name); + event_log_info (hashcat_ctx, " Processor(s)...: %u", device_processors); + event_log_info (hashcat_ctx, " Clock..........: N/A"); + event_log_info (hashcat_ctx, " Memory.Total...: %" PRIu64 " MB (limited to %" PRIu64 " MB allocatable in one block)", device_global_mem / 1024 / 1024, device_maxmem_alloc / 1024 / 1024); + event_log_info (hashcat_ctx, " Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024); + event_log_info (hashcat_ctx, " Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024); + + switch (device_physical_location) + { + case MTL_DEVICE_LOCATION_BUILTIN: event_log_info (hashcat_ctx, " Phys.Location..: built-in"); break; + case MTL_DEVICE_LOCATION_SLOT: event_log_info (hashcat_ctx, " Phys.Location..: connected to slot %d", device_location_number); break; + case MTL_DEVICE_LOCATION_EXTERNAL: event_log_info (hashcat_ctx, " Phys.Location..: connected via an external interface (port %d)", device_location_number); break; + case MTL_DEVICE_LOCATION_UNSPECIFIED: event_log_info (hashcat_ctx, " Phys.Location..: unspecified"); break; + default: event_log_info (hashcat_ctx, " Phys.Location..: N/A"); break; + } + + if (device_mtl_maj > 0 && device_mtl_min > 0) + { + event_log_info (hashcat_ctx, " Feature.Set....: macOS GPU Family %d v%d", device_mtl_maj, device_mtl_min); + } + else + { + event_log_info (hashcat_ctx, " Feature.Set....: N/A"); + } + + event_log_info (hashcat_ctx, " Registry.ID....: %d", device_registryID); + + if (device_physical_location != MTL_DEVICE_LOCATION_BUILTIN) + { + event_log_info (hashcat_ctx, " Max.TX.Rate....: %d MB/sec", device_max_transfer_rate); + } + else + { + event_log_info (hashcat_ctx, " Max.TX.Rate....: N/A"); + } + + event_log_info (hashcat_ctx, " GPU.Properties.: headless %d, low-power %d, removable %d", device_is_headless, device_is_low_power, device_is_removable); + event_log_info (hashcat_ctx, NULL); + } + } + #endif + if (backend_ctx->ocl) { event_log_info (hashcat_ctx, "OpenCL Info:"); @@ -1187,6 +1293,59 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx) event_log_info (hashcat_ctx, NULL); } + #if defined (__APPLE__) + /** + * Metal + */ + + if (backend_ctx->mtl) + { + int metal_devices_cnt = backend_ctx->metal_devices_cnt; + char *metal_runtimeVersionStr = backend_ctx->metal_runtimeVersionStr; + + size_t len = event_log_info (hashcat_ctx, "METAL API (Metal %s)", metal_runtimeVersionStr); + + char line[HCBUFSIZ_TINY] = { 0 }; + + memset (line, '=', len); + + line[len] = 0; + + event_log_info (hashcat_ctx, "%s", line); + + for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++) + { + const int backend_devices_idx = backend_ctx->backend_device_from_metal[metal_devices_idx]; + + const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx; + + int device_id = device_param->device_id; + char *device_name = device_param->device_name; + u32 device_processors = device_param->device_processors; + u64 device_global_mem = device_param->device_global_mem; + u64 device_available_mem = device_param->device_available_mem; + + if ((device_param->skipped == false) && (device_param->skipped_warning == false)) + { + event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB, %uMCU", + device_id + 1, + device_name, + device_available_mem / 1024 / 1024, + device_global_mem / 1024 / 1024, + device_processors); + } + else + { + event_log_info (hashcat_ctx, "* Device #%u: %s, skipped", + device_id + 1, + device_name); + } + } + + event_log_info (hashcat_ctx, NULL); + } + #endif + /** * OpenCL */ diff --git a/src/usage.c b/src/usage.c index 69c35485a..514728fd2 100644 --- a/src/usage.c +++ b/src/usage.c @@ -95,6 +95,7 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] = " --example-hashes | | Alias of --hash-info |", " --backend-ignore-cuda | | Do not try to open CUDA interface on startup |", " --backend-ignore-hip | | Do not try to open HIP interface on startup |", + " --backend-ignore-metal | | Do not try to open Metal interface on startup |", " --backend-ignore-opencl | | Do not try to open OpenCL interface on startup |", " -I, --backend-info | | Show info about detected backend API devices | -I", " -d, --backend-devices | Str | Backend devices to use, separated with commas | -d 1", diff --git a/src/user_options.c b/src/user_options.c index b55f4d83a..15076a3fe 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -35,6 +35,9 @@ static const struct option long_options[] = {"backend-devices", required_argument, NULL, IDX_BACKEND_DEVICES}, {"backend-ignore-cuda", no_argument, NULL, IDX_BACKEND_IGNORE_CUDA}, {"backend-ignore-hip", no_argument, NULL, IDX_BACKEND_IGNORE_HIP}, + #if defined (__APPLE__) + {"backend-ignore-metal", no_argument, NULL, IDX_BACKEND_IGNORE_METAL}, + #endif {"backend-ignore-opencl", no_argument, NULL, IDX_BACKEND_IGNORE_OPENCL}, {"backend-info", no_argument, NULL, IDX_BACKEND_INFO}, {"backend-vector-width", required_argument, NULL, IDX_BACKEND_VECTOR_WIDTH}, @@ -170,6 +173,9 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx) user_options->backend_devices = NULL; user_options->backend_ignore_cuda = BACKEND_IGNORE_CUDA; user_options->backend_ignore_hip = BACKEND_IGNORE_HIP; + #if defined (__APPLE__) + user_options->backend_ignore_metal = BACKEND_IGNORE_METAL; + #endif user_options->backend_ignore_opencl = BACKEND_IGNORE_OPENCL; user_options->backend_info = BACKEND_INFO; user_options->backend_vector_width = BACKEND_VECTOR_WIDTH; @@ -455,6 +461,9 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv) case IDX_CPU_AFFINITY: user_options->cpu_affinity = optarg; break; case IDX_BACKEND_IGNORE_CUDA: user_options->backend_ignore_cuda = true; break; case IDX_BACKEND_IGNORE_HIP: user_options->backend_ignore_hip = true; break; + #if defined (__APPLE__) + case IDX_BACKEND_IGNORE_METAL: user_options->backend_ignore_metal = true; break; + #endif case IDX_BACKEND_IGNORE_OPENCL: user_options->backend_ignore_opencl = true; break; case IDX_BACKEND_INFO: user_options->backend_info = true; break; case IDX_BACKEND_DEVICES: user_options->backend_devices = optarg; break;