diff --git a/docs/changes.txt b/docs/changes.txt
index a914583f5..115489cb5 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -13,10 +13,11 @@
 ## Features
 ##
 
-- Added support to building Universal macOS binary on Apple Silicon
-- Added support to use --debug-mode in attack-mode 9 (Association Attack)
 - Added guess data to --status-json output
 - Added hex format for --separator option
+- Added new backend support for Metal, the OpenCL replacement API on Apple
+- Added support to building Universal macOS binary on Apple Silicon
+- Added support to use --debug-mode in attack-mode 9 (Association Attack)
 
 ##
 ## Bugs
diff --git a/docs/credits.txt b/docs/credits.txt
index 268719f2c..bb0f95a7d 100644
--- a/docs/credits.txt
+++ b/docs/credits.txt
@@ -21,6 +21,7 @@ Gabriele "matrix" Gristina <matrix@hashcat.net> (@gm4tr1x)
 * Multiple kernel modules
 * Compressed wordlist feature
 * OpenCL Info feature
+* Apple Metal Runtime API feature
 * Apple macOS port
 * Apple Silicon support
 * Universal binary on Apple Silicon
diff --git a/include/backend.h b/include/backend.h
index c7e7de0a1..2f2a58f9c 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -40,43 +40,51 @@ int  backend_session_update_mp              (hashcat_ctx_t *hashcat_ctx);
 int  backend_session_update_mp_rl           (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r);
 
 void generate_source_kernel_filename        (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file);
-void generate_cached_kernel_filename        (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file);
+void generate_cached_kernel_filename        (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal);
 void generate_source_kernel_shared_filename (char *shared_dir, char *source_file);
-void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum, char *cached_file);
+void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal);
 void generate_source_kernel_mp_filename     (const u32 opti_type, const u64 opts_type, char *shared_dir, char *source_file);
-void generate_cached_kernel_mp_filename     (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum, char *cached_file);
+void generate_cached_kernel_mp_filename     (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal);
 void generate_source_kernel_amp_filename    (const u32 attack_kern, char *shared_dir, char *source_file);
-void generate_cached_kernel_amp_filename    (const u32 attack_kern, char *cache_dir, const char *device_name_chksum, char *cached_file);
-
-int gidd_to_pw_t                    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw);
-
-int choose_kernel                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos);
-
-int run_cuda_kernel_atinit          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
-int run_cuda_kernel_utf8toutf16le   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
-int run_cuda_kernel_memset          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8  value, const u64 size);
-int run_cuda_kernel_memset32        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size);
-int run_cuda_kernel_bzero           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
-
-int run_hip_kernel_atinit           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
-int run_hip_kernel_utf8toutf16le    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
-int run_hip_kernel_memset           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8  value, const u64 size);
-int run_hip_kernel_memset32         (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size);
-int run_hip_kernel_bzero            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size);
-
-int run_opencl_kernel_atinit        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
-int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
-int run_opencl_kernel_memset        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8  value, const u64 size);
-int run_opencl_kernel_memset32      (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size);
-int run_opencl_kernel_bzero         (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
-
-int run_kernel                      (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration);
-int run_kernel_mp                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
-int run_kernel_tm                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param);
-int run_kernel_amp                  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
-int run_kernel_decompress           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
-int run_copy                        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt);
-int run_cracker                     (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_pos, const u64 pws_cnt);
+void generate_cached_kernel_amp_filename    (const u32 attack_kern, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal);
+
+int gidd_to_pw_t                            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw);
+
+int choose_kernel                           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos);
+
+int run_cuda_kernel_atinit                  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
+int run_cuda_kernel_utf8toutf16le           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
+int run_cuda_kernel_memset                  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8  value, const u64 size);
+int run_cuda_kernel_memset32                (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size);
+int run_cuda_kernel_bzero                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
+
+int run_hip_kernel_atinit                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
+int run_hip_kernel_utf8toutf16le            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
+int run_hip_kernel_memset                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8  value, const u64 size);
+int run_hip_kernel_memset32                 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size);
+int run_hip_kernel_bzero                    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size);
+
+#if defined (__APPLE__)
+int run_metal_kernel_atinit                 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 num);
+int run_metal_kernel_utf8toutf16le          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 num);
+int run_metal_kernel_memset                 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 offset, const u8  value, const u64 size);
+int run_metal_kernel_memset32               (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 offset, const u32 value, const u64 size);
+int run_metal_kernel_bzero                  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 size);
+#endif
+
+int run_opencl_kernel_atinit                (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
+int run_opencl_kernel_utf8toutf16le         (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
+int run_opencl_kernel_memset                (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8  value, const u64 size);
+int run_opencl_kernel_memset32              (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size);
+int run_opencl_kernel_bzero                 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
+
+int run_kernel                              (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration);
+int run_kernel_mp                           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
+int run_kernel_tm                           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param);
+int run_kernel_amp                          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
+int run_kernel_decompress                   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
+int run_copy                                (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt);
+int run_cracker                             (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_pos, const u64 pws_cnt);
 
 void *hook12_thread (void *p);
 void *hook23_thread (void *p);
diff --git a/include/ext_metal.h b/include/ext_metal.h
new file mode 100644
index 000000000..1bb1425f8
--- /dev/null
+++ b/include/ext_metal.h
@@ -0,0 +1,118 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EXT_METAL_H
+#define _EXT_METAL_H
+
+#if defined (__APPLE__)
+
+#include <objc/runtime.h>
+#include <CoreFoundation/CoreFoundation.h>
+
+#define mtl_device_id id
+#define mtl_command_queue id
+#define mtl_function id
+#define mtl_pipeline id
+#define mtl_mem id
+#define mtl_library id
+#define mtl_command_buffer id
+#define mtl_command_encoder id
+#define mtl_blit_command_encoder id
+#define mtl_compute_command_encoder id
+
+typedef enum metalDeviceAttribute
+{
+  MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 1,
+  MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY,
+  MTL_DEVICE_ATTRIBUTE_WARP_SIZE,
+  MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+  MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+  MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+  MTL_DEVICE_ATTRIBUTE_CLOCK_RATE,
+  MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
+  MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
+  MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE,
+  MTL_DEVICE_ATTRIBUTE_HEADLESS,
+  MTL_DEVICE_ATTRIBUTE_LOW_POWER,
+  MTL_DEVICE_ATTRIBUTE_REMOVABLE,
+  MTL_DEVICE_ATTRIBUTE_REGISTRY_ID,
+  MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION,
+  MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER,
+
+} metalDeviceAttribute_t;
+
+typedef enum metalDeviceLocation
+{
+  // MTLDeviceLocationBuiltIn
+  // The GPU is built into the device
+  MTL_DEVICE_LOCATION_BUILTIN = 0,
+
+  // MTLDeviceLocationSlot
+  // The GPU is connected to a slot inside the computer
+  MTL_DEVICE_LOCATION_SLOT = 1,
+
+  // MTLDeviceLocationExternal
+  // The GPU is connected via an external interface, such as Thunderbolt
+  MTL_DEVICE_LOCATION_EXTERNAL = 2,
+
+  // MTLDeviceLocationUnspecified
+  // The GPU's location is not specified or cannot be determined
+  MTL_DEVICE_LOCATION_UNSPECIFIED = 4294967295,
+
+} metalDeviceLocation_t;
+
+typedef struct hc_metal
+{
+  CFArrayRef devices;
+
+} hc_metal_t;
+
+typedef hc_metal_t MTL_PTR;
+
+int  mtl_init                       (void *hashcat_ctx);
+void mtl_close                      (void *hashcat_ctx);
+
+int  hc_mtlRuntimeGetVersionString  (void *hashcat_ctx, char *runtimeVersion_str, size_t *size);
+
+int  hc_mtlDeviceGetCount           (void *hashcat_ctx, int *count);
+int  hc_mtlDeviceGet                (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal);
+int  hc_mtlDeviceGetName            (void *hashcat_ctx, char *name, size_t len, mtl_device_id metal_device);
+int  hc_mtlDeviceGetAttribute       (void *hashcat_ctx, int *pi, metalDeviceAttribute_t attrib, mtl_device_id metal_device);
+int  hc_mtlDeviceTotalMem           (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device);
+int  hc_mtlDeviceMaxMemAlloc        (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device);
+int  hc_mtlMemGetInfo               (void *hashcat_ctx, size_t *mem_free, size_t *mem_total);
+
+int  hc_mtlCreateCommandQueue       (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue *command_queue);
+int  hc_mtlCreateBuffer             (void *hashcat_ctx, mtl_device_id metal_device, size_t size, void *ptr, mtl_mem *metal_buffer);
+
+int  hc_mtlCreateKernel             (void *hashcat_ctx, mtl_device_id metal_device, mtl_library metal_library, const char *func_name, mtl_function *metal_function, mtl_pipeline *metal_pipeline);
+
+int  hc_mtlGetMaxTotalThreadsPerThreadgroup (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *maxTotalThreadsPerThreadgroup);
+int  hc_mtlGetThreadExecutionWidth  (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *threadExecutionWidth);
+
+// copy buffer
+int  hc_mtlMemcpyDtoD               (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, mtl_mem buf_src, size_t buf_src_off, size_t buf_size);
+// write
+int  hc_mtlMemcpyHtoD               (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, const void *buf_src, size_t buf_size);
+// read
+int  hc_mtlMemcpyDtoH               (void *hashcat_ctx, mtl_command_queue command_queue, void *buf_dst, mtl_mem buf_src, size_t buf_src_off, size_t buf_size);
+
+int  hc_mtlReleaseMemObject         (void *hashcat_ctx, mtl_mem metal_buffer);
+int  hc_mtlReleaseFunction          (void *hashcat_ctx, mtl_function metal_function);
+int  hc_mtlReleaseLibrary           (void *hashcat_ctx, mtl_function metal_library);
+int  hc_mtlReleaseCommandQueue      (void *hashcat_ctx, mtl_command_queue command_queue);
+int  hc_mtlReleaseDevice            (void *hashcat_ctx, mtl_device_id metal_device);
+
+int  hc_mtlCreateLibraryWithSource  (void *hashcat_ctx, mtl_device_id metal_device, const char *kernel_sources, const char *build_options_buf, const char *include_path, mtl_library *metal_library);
+int  hc_mtlCreateLibraryWithFile    (void *hashcat_ctx, mtl_device_id metal_device, const char *cached_file, mtl_library *metal_library);
+
+int  hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder);
+int  hc_mtlSetCommandEncoderArg     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size);
+
+int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms);
+
+#endif // __APPLE__
+
+#endif // _EXT_METAL_H
diff --git a/include/types.h b/include/types.h
index 94d2f1505..9fe03e76d 100644
--- a/include/types.h
+++ b/include/types.h
@@ -660,6 +660,9 @@ typedef enum user_options_defaults
   NONCE_ERROR_CORRECTIONS  = 8,
   BACKEND_IGNORE_CUDA      = false,
   BACKEND_IGNORE_HIP       = false,
+  #if defined (__APPLE__)
+  BACKEND_IGNORE_METAL     = false,
+  #endif
   BACKEND_IGNORE_OPENCL    = false,
   BACKEND_INFO             = false,
   BACKEND_VECTOR_WIDTH     = 0,
@@ -711,116 +714,117 @@ typedef enum user_options_map
   IDX_BACKEND_DEVICES           = 'd',
   IDX_BACKEND_IGNORE_CUDA       = 0xff01,
   IDX_BACKEND_IGNORE_HIP        = 0xff02,
-  IDX_BACKEND_IGNORE_OPENCL     = 0xff03,
+  IDX_BACKEND_IGNORE_METAL      = 0xff03,
+  IDX_BACKEND_IGNORE_OPENCL     = 0xff04,
   IDX_BACKEND_INFO              = 'I',
-  IDX_BACKEND_VECTOR_WIDTH      = 0xff04,
-  IDX_BENCHMARK_ALL             = 0xff05,
+  IDX_BACKEND_VECTOR_WIDTH      = 0xff05,
+  IDX_BENCHMARK_ALL             = 0xff06,
   IDX_BENCHMARK                 = 'b',
-  IDX_BITMAP_MAX                = 0xff06,
-  IDX_BITMAP_MIN                = 0xff07,
+  IDX_BITMAP_MAX                = 0xff07,
+  IDX_BITMAP_MIN                = 0xff08,
   #ifdef WITH_BRAIN
   IDX_BRAIN_CLIENT              = 'z',
-  IDX_BRAIN_CLIENT_FEATURES     = 0xff08,
-  IDX_BRAIN_HOST                = 0xff09,
-  IDX_BRAIN_PASSWORD            = 0xff0a,
-  IDX_BRAIN_PORT                = 0xff0b,
-  IDX_BRAIN_SERVER              = 0xff0c,
-  IDX_BRAIN_SERVER_TIMER        = 0xff0d,
-  IDX_BRAIN_SESSION             = 0xff0e,
-  IDX_BRAIN_SESSION_WHITELIST   = 0xff0f,
+  IDX_BRAIN_CLIENT_FEATURES     = 0xff09,
+  IDX_BRAIN_HOST                = 0xff0a,
+  IDX_BRAIN_PASSWORD            = 0xff0b,
+  IDX_BRAIN_PORT                = 0xff0c,
+  IDX_BRAIN_SERVER              = 0xff0d,
+  IDX_BRAIN_SERVER_TIMER        = 0xff0e,
+  IDX_BRAIN_SESSION             = 0xff0f,
+  IDX_BRAIN_SESSION_WHITELIST   = 0xff10,
   #endif
-  IDX_CPU_AFFINITY              = 0xff10,
+  IDX_CPU_AFFINITY              = 0xff11,
   IDX_CUSTOM_CHARSET_1          = '1',
   IDX_CUSTOM_CHARSET_2          = '2',
   IDX_CUSTOM_CHARSET_3          = '3',
   IDX_CUSTOM_CHARSET_4          = '4',
-  IDX_DEBUG_FILE                = 0xff11,
-  IDX_DEBUG_MODE                = 0xff12,
-  IDX_DEPRECATED_CHECK_DISABLE  = 0xff13,
-  IDX_ENCODING_FROM             = 0xff14,
-  IDX_ENCODING_TO               = 0xff15,
-  IDX_HASH_INFO                 = 0xff16,
-  IDX_FORCE                     = 0xff17,
-  IDX_HWMON_DISABLE             = 0xff18,
-  IDX_HWMON_TEMP_ABORT          = 0xff19,
+  IDX_DEBUG_FILE                = 0xff12,
+  IDX_DEBUG_MODE                = 0xff13,
+  IDX_DEPRECATED_CHECK_DISABLE  = 0xff14,
+  IDX_ENCODING_FROM             = 0xff15,
+  IDX_ENCODING_TO               = 0xff16,
+  IDX_HASH_INFO                 = 0xff17,
+  IDX_FORCE                     = 0xff18,
+  IDX_HWMON_DISABLE             = 0xff19,
+  IDX_HWMON_TEMP_ABORT          = 0xff1a,
   IDX_HASH_MODE                 = 'm',
-  IDX_HCCAPX_MESSAGE_PAIR       = 0xff1a,
+  IDX_HCCAPX_MESSAGE_PAIR       = 0xff1b,
   IDX_HELP                      = 'h',
-  IDX_HEX_CHARSET               = 0xff1b,
-  IDX_HEX_SALT                  = 0xff1c,
-  IDX_HEX_WORDLIST              = 0xff1d,
-  IDX_HOOK_THREADS              = 0xff1e,
-  IDX_IDENTIFY                  = 0xff1f,
+  IDX_HEX_CHARSET               = 0xff1c,
+  IDX_HEX_SALT                  = 0xff1d,
+  IDX_HEX_WORDLIST              = 0xff1e,
+  IDX_HOOK_THREADS              = 0xff1f,
+  IDX_IDENTIFY                  = 0xff20,
   IDX_INCREMENT                 = 'i',
-  IDX_INCREMENT_MAX             = 0xff20,
-  IDX_INCREMENT_MIN             = 0xff21,
-  IDX_INDUCTION_DIR             = 0xff22,
-  IDX_KEEP_GUESSING             = 0xff23,
+  IDX_INCREMENT_MAX             = 0xff21,
+  IDX_INCREMENT_MIN             = 0xff22,
+  IDX_INDUCTION_DIR             = 0xff23,
+  IDX_KEEP_GUESSING             = 0xff24,
   IDX_KERNEL_ACCEL              = 'n',
   IDX_KERNEL_LOOPS              = 'u',
   IDX_KERNEL_THREADS            = 'T',
-  IDX_KEYBOARD_LAYOUT_MAPPING   = 0xff24,
-  IDX_KEYSPACE                  = 0xff25,
-  IDX_LEFT                      = 0xff26,
+  IDX_KEYBOARD_LAYOUT_MAPPING   = 0xff25,
+  IDX_KEYSPACE                  = 0xff26,
+  IDX_LEFT                      = 0xff27,
   IDX_LIMIT                     = 'l',
-  IDX_LOGFILE_DISABLE           = 0xff27,
-  IDX_LOOPBACK                  = 0xff28,
-  IDX_MACHINE_READABLE          = 0xff29,
-  IDX_MARKOV_CLASSIC            = 0xff2a,
-  IDX_MARKOV_DISABLE            = 0xff2b,
-  IDX_MARKOV_HCSTAT2            = 0xff2c,
-  IDX_MARKOV_INVERSE            = 0xff2d,
+  IDX_LOGFILE_DISABLE           = 0xff28,
+  IDX_LOOPBACK                  = 0xff29,
+  IDX_MACHINE_READABLE          = 0xff2a,
+  IDX_MARKOV_CLASSIC            = 0xff2b,
+  IDX_MARKOV_DISABLE            = 0xff2c,
+  IDX_MARKOV_HCSTAT2            = 0xff2d,
+  IDX_MARKOV_INVERSE            = 0xff2e,
   IDX_MARKOV_THRESHOLD          = 't',
-  IDX_NONCE_ERROR_CORRECTIONS   = 0xff2e,
+  IDX_NONCE_ERROR_CORRECTIONS   = 0xff2f,
   IDX_OPENCL_DEVICE_TYPES       = 'D',
   IDX_OPTIMIZED_KERNEL_ENABLE   = 'O',
   IDX_MULTIPLY_ACCEL_DISABLE    = 'M',
-  IDX_OUTFILE_AUTOHEX_DISABLE   = 0xff2f,
-  IDX_OUTFILE_CHECK_DIR         = 0xff30,
-  IDX_OUTFILE_CHECK_TIMER       = 0xff31,
-  IDX_OUTFILE_FORMAT            = 0xff32,
+  IDX_OUTFILE_AUTOHEX_DISABLE   = 0xff30,
+  IDX_OUTFILE_CHECK_DIR         = 0xff31,
+  IDX_OUTFILE_CHECK_TIMER       = 0xff32,
+  IDX_OUTFILE_FORMAT            = 0xff33,
   IDX_OUTFILE                   = 'o',
-  IDX_POTFILE_DISABLE           = 0xff33,
-  IDX_POTFILE_PATH              = 0xff34,
-  IDX_PROGRESS_ONLY             = 0xff35,
-  IDX_QUIET                     = 0xff36,
-  IDX_REMOVE                    = 0xff37,
-  IDX_REMOVE_TIMER              = 0xff38,
-  IDX_RESTORE                   = 0xff39,
-  IDX_RESTORE_DISABLE           = 0xff3a,
-  IDX_RESTORE_FILE_PATH         = 0xff3b,
+  IDX_POTFILE_DISABLE           = 0xff34,
+  IDX_POTFILE_PATH              = 0xff35,
+  IDX_PROGRESS_ONLY             = 0xff36,
+  IDX_QUIET                     = 0xff37,
+  IDX_REMOVE                    = 0xff38,
+  IDX_REMOVE_TIMER              = 0xff39,
+  IDX_RESTORE                   = 0xff3a,
+  IDX_RESTORE_DISABLE           = 0xff3b,
+  IDX_RESTORE_FILE_PATH         = 0xff3c,
   IDX_RP_FILE                   = 'r',
-  IDX_RP_GEN_FUNC_MAX           = 0xff3c,
-  IDX_RP_GEN_FUNC_MIN           = 0xff3d,
-  IDX_RP_GEN_FUNC_SEL           = 0xff3e,
+  IDX_RP_GEN_FUNC_MAX           = 0xff3d,
+  IDX_RP_GEN_FUNC_MIN           = 0xff3e,
+  IDX_RP_GEN_FUNC_SEL           = 0xff3f,
   IDX_RP_GEN                    = 'g',
-  IDX_RP_GEN_SEED               = 0xff3f,
+  IDX_RP_GEN_SEED               = 0xff40,
   IDX_RULE_BUF_L                = 'j',
   IDX_RULE_BUF_R                = 'k',
-  IDX_RUNTIME                   = 0xff40,
-  IDX_SCRYPT_TMTO               = 0xff41,
+  IDX_RUNTIME                   = 0xff41,
+  IDX_SCRYPT_TMTO               = 0xff42,
   IDX_SEGMENT_SIZE              = 'c',
-  IDX_SELF_TEST_DISABLE         = 0xff42,
+  IDX_SELF_TEST_DISABLE         = 0xff43,
   IDX_SEPARATOR                 = 'p',
-  IDX_SESSION                   = 0xff43,
-  IDX_SHOW                      = 0xff44,
+  IDX_SESSION                   = 0xff44,
+  IDX_SHOW                      = 0xff45,
   IDX_SKIP                      = 's',
   IDX_SLOW_CANDIDATES           = 'S',
-  IDX_SPEED_ONLY                = 0xff45,
-  IDX_SPIN_DAMP                 = 0xff46,
-  IDX_STATUS                    = 0xff47,
-  IDX_STATUS_JSON               = 0xff48,
-  IDX_STATUS_TIMER              = 0xff49,
-  IDX_STDOUT_FLAG               = 0xff4a,
-  IDX_STDIN_TIMEOUT_ABORT       = 0xff4b,
-  IDX_TRUECRYPT_KEYFILES        = 0xff4c,
-  IDX_USERNAME                  = 0xff4d,
-  IDX_VERACRYPT_KEYFILES        = 0xff4e,
-  IDX_VERACRYPT_PIM_START       = 0xff4f,
-  IDX_VERACRYPT_PIM_STOP        = 0xff50,
+  IDX_SPEED_ONLY                = 0xff46,
+  IDX_SPIN_DAMP                 = 0xff47,
+  IDX_STATUS                    = 0xff48,
+  IDX_STATUS_JSON               = 0xff49,
+  IDX_STATUS_TIMER              = 0xff4a,
+  IDX_STDOUT_FLAG               = 0xff4b,
+  IDX_STDIN_TIMEOUT_ABORT       = 0xff4c,
+  IDX_TRUECRYPT_KEYFILES        = 0xff4d,
+  IDX_USERNAME                  = 0xff4e,
+  IDX_VERACRYPT_KEYFILES        = 0xff4f,
+  IDX_VERACRYPT_PIM_START       = 0xff50,
+  IDX_VERACRYPT_PIM_STOP        = 0xff51,
   IDX_VERSION_LOWER             = 'v',
   IDX_VERSION                   = 'V',
-  IDX_WORDLIST_AUTOHEX_DISABLE  = 0xff51,
+  IDX_WORDLIST_AUTOHEX_DISABLE  = 0xff52,
   IDX_WORKLOAD_PROFILE          = 'w',
 
 } user_options_map_t;
@@ -1100,6 +1104,7 @@ typedef struct hc_fp
 #include "ext_cuda.h"
 #include "ext_hip.h"
 #include "ext_OpenCL.h"
+#include "ext_metal.h"
 
 typedef struct hc_device_param
 {
@@ -1601,6 +1606,129 @@ typedef struct hc_device_param
   hipDeviceptr_t    hip_d_st_esalts_buf;
   hipDeviceptr_t    hip_d_kernel_param;
 
+  // API: opencl and metal
+
+  bool              is_apple_silicon;
+
+  // API: metal
+
+  bool              is_metal;
+
+  #if defined (__APPLE__)
+
+  int               mtl_major;
+  int               mtl_minor;
+
+  int               device_physical_location;
+  int               device_location_number;
+  int               device_registryID;
+  int               device_max_transfer_rate;
+  int               device_is_headless;
+  int               device_is_low_power;
+  int               device_is_removable;
+
+  int               metal_warp_size;
+
+  mtl_device_id     metal_device;
+  mtl_command_queue metal_command_queue;
+
+  mtl_library       metal_library;
+  mtl_library       metal_library_shared;
+  mtl_library       metal_library_mp;
+  mtl_library       metal_library_amp;
+
+  mtl_function      metal_function1;
+  mtl_function      metal_function12;
+  mtl_function      metal_function2p;
+  mtl_function      metal_function2;
+  mtl_function      metal_function2e;
+  mtl_function      metal_function23;
+  mtl_function      metal_function3;
+  mtl_function      metal_function4;
+  mtl_function      metal_function_init2;
+  mtl_function      metal_function_loop2p;
+  mtl_function      metal_function_loop2;
+  mtl_function      metal_function_mp;
+  mtl_function      metal_function_mp_l;
+  mtl_function      metal_function_mp_r;
+  mtl_function      metal_function_amp;
+  mtl_function      metal_function_tm;
+  mtl_function      metal_function_memset;
+  mtl_function      metal_function_bzero;
+  mtl_function      metal_function_atinit;
+  mtl_function      metal_function_utf8toutf16le;
+  mtl_function      metal_function_decompress;
+  mtl_function      metal_function_aux1;
+  mtl_function      metal_function_aux2;
+  mtl_function      metal_function_aux3;
+  mtl_function      metal_function_aux4;
+
+  mtl_pipeline      metal_pipeline1;
+  mtl_pipeline      metal_pipeline12;
+  mtl_pipeline      metal_pipeline2p;
+  mtl_pipeline      metal_pipeline2;
+  mtl_pipeline      metal_pipeline2e;
+  mtl_pipeline      metal_pipeline23;
+  mtl_pipeline      metal_pipeline3;
+  mtl_pipeline      metal_pipeline4;
+  mtl_pipeline      metal_pipeline_init2;
+  mtl_pipeline      metal_pipeline_loop2p;
+  mtl_pipeline      metal_pipeline_loop2;
+  mtl_pipeline      metal_pipeline_mp;
+  mtl_pipeline      metal_pipeline_mp_l;
+  mtl_pipeline      metal_pipeline_mp_r;
+  mtl_pipeline      metal_pipeline_amp;
+  mtl_pipeline      metal_pipeline_tm;
+  mtl_pipeline      metal_pipeline_memset;
+  mtl_pipeline      metal_pipeline_bzero;
+  mtl_pipeline      metal_pipeline_atinit;
+  mtl_pipeline      metal_pipeline_utf8toutf16le;
+  mtl_pipeline      metal_pipeline_decompress;
+  mtl_pipeline      metal_pipeline_aux1;
+  mtl_pipeline      metal_pipeline_aux2;
+  mtl_pipeline      metal_pipeline_aux3;
+  mtl_pipeline      metal_pipeline_aux4;
+
+  mtl_mem           metal_d_pws_buf;
+  mtl_mem           metal_d_pws_amp_buf;
+  mtl_mem           metal_d_pws_comp_buf;
+  mtl_mem           metal_d_pws_idx;
+  mtl_mem           metal_d_rules;
+  mtl_mem           metal_d_rules_c;
+  mtl_mem           metal_d_combs;
+  mtl_mem           metal_d_combs_c;
+  mtl_mem           metal_d_bfs;
+  mtl_mem           metal_d_bfs_c;
+  mtl_mem           metal_d_tm_c;
+  mtl_mem           metal_d_bitmap_s1_a;
+  mtl_mem           metal_d_bitmap_s1_b;
+  mtl_mem           metal_d_bitmap_s1_c;
+  mtl_mem           metal_d_bitmap_s1_d;
+  mtl_mem           metal_d_bitmap_s2_a;
+  mtl_mem           metal_d_bitmap_s2_b;
+  mtl_mem           metal_d_bitmap_s2_c;
+  mtl_mem           metal_d_bitmap_s2_d;
+  mtl_mem           metal_d_plain_bufs;
+  mtl_mem           metal_d_digests_buf;
+  mtl_mem           metal_d_digests_shown;
+  mtl_mem           metal_d_salt_bufs;
+  mtl_mem           metal_d_esalt_bufs;
+  mtl_mem           metal_d_tmps;
+  mtl_mem           metal_d_hooks;
+  mtl_mem           metal_d_result;
+  mtl_mem           metal_d_extra0_buf;
+  mtl_mem           metal_d_extra1_buf;
+  mtl_mem           metal_d_extra2_buf;
+  mtl_mem           metal_d_extra3_buf;
+  mtl_mem           metal_d_root_css_buf;
+  mtl_mem           metal_d_markov_css_buf;
+  mtl_mem           metal_d_st_digests_buf;
+  mtl_mem           metal_d_st_salts_buf;
+  mtl_mem           metal_d_st_esalts_buf;
+  mtl_mem           metal_d_kernel_param;
+
+  #endif // __APPLE__
+
   // API: opencl
 
   bool              is_opencl;
@@ -1708,6 +1836,7 @@ typedef struct backend_ctx
 
   void               *cuda;
   void               *hip;
+  void               *mtl;
   void               *ocl;
 
   void               *nvrtc;
@@ -1715,6 +1844,7 @@ typedef struct backend_ctx
 
   int                 backend_device_from_cuda[DEVICES_MAX];                              // from cuda device index to backend device index
   int                 backend_device_from_hip[DEVICES_MAX];                               // from hip device index to backend device index
+  int                 backend_device_from_metal[DEVICES_MAX];                             // from metal device index to backend device index
   int                 backend_device_from_opencl[DEVICES_MAX];                            // from opencl device index to backend device index
   int                 backend_device_from_opencl_platform[CL_PLATFORMS_MAX][DEVICES_MAX]; // from opencl device index to backend device index (by platform)
 
@@ -1725,6 +1855,8 @@ typedef struct backend_ctx
   int                 cuda_devices_active;
   int                 hip_devices_cnt;
   int                 hip_devices_active;
+  int                 metal_devices_cnt;
+  int                 metal_devices_active;
   int                 opencl_devices_cnt;
   int                 opencl_devices_active;
 
@@ -1766,6 +1898,13 @@ typedef struct backend_ctx
   int                 hip_runtimeVersion;
   int                 hip_driverVersion;
 
+  // metal
+
+  int                 rc_metal_init;
+
+  unsigned int        metal_runtimeVersion;
+  char               *metal_runtimeVersionStr;
+
   // opencl
 
   cl_platform_id     *opencl_platforms;
@@ -2169,6 +2308,7 @@ typedef struct user_options
   bool         markov_inverse;
   bool         backend_ignore_cuda;
   bool         backend_ignore_hip;
+  bool         backend_ignore_metal;
   bool         backend_ignore_opencl;
   bool         backend_info;
   bool         optimized_kernel_enable;
diff --git a/src/Makefile b/src/Makefile
index 4a5d419da..a26f5598c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -331,7 +331,11 @@ CFLAGS_NATIVE           += -DMISSING_CLOCK_GETTIME
 endif
 
 LFLAGS_NATIVE           := $(LFLAGS)
+LFLAGS_NATIVE           += -framework CoreFoundation
+LFLAGS_NATIVE           += -framework CoreGraphics
+LFLAGS_NATIVE           += -framework Foundation
 LFLAGS_NATIVE           += -framework IOKit
+LFLAGS_NATIVE           += -framework Metal
 LFLAGS_NATIVE           += -lpthread
 LFLAGS_NATIVE           += -liconv
 
@@ -385,6 +389,10 @@ EMU_OBJS_ALL            += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_ci
 
 OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_hip ext_nvapi ext_nvml ext_nvrtc ext_hiprtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
 
+ifeq ($(UNAME),Darwin)
+OBJS_ALL                += ext_metal
+endif
+
 ifeq ($(ENABLE_BRAIN),1)
 OBJS_ALL                += brain
 endif
@@ -585,6 +593,9 @@ uninstall:
 obj/%.NATIVE.o: src/%.c
 	$(CC) -c $(CCFLAGS) $(CFLAGS_NATIVE) $< -o $@ -fpic
 
+obj/%.NATIVE.o: src/%.m
+	$(CC) -c $(CCFLAGS) $(CFLAGS_NATIVE) $< -o $@ -fpic
+
 ifeq ($(USE_SYSTEM_LZMA),0)
 obj/%.LZMA.NATIVE.o: $(DEPS_LZMA_PATH)/%.c
 	$(CC) -c $(CCFLAGS) $(CFLAGS_NATIVE) $(CFLAGS_LZMA) $< -o $@ -fpic
diff --git a/src/autotune.c b/src/autotune.c
index bcbdd0eb3..55c829802 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -238,6 +238,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max) == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (run_metal_kernel_atinit (hashcat_ctx, device_param, device_param->metal_d_pws_buf, kernel_power_max) == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max) == -1) return -1;
@@ -264,6 +271,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_rules_c, 0, device_param->metal_d_rules, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1;
@@ -477,6 +491,17 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_buf, device_param->size_pws) == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_plain_bufs, device_param->size_plains) == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_digests_shown, device_param->size_shown) == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result, device_param->size_results) == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tmps, device_param->size_tmps) == -1) return -1;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, device_param->size_pws) == -1) return -1;
diff --git a/src/backend.c b/src/backend.c
index 0aa098130..bfd092931 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -65,6 +65,12 @@ static bool is_same_device (const hc_device_param_t *src, const hc_device_param_
 
   if ((src->is_hip == true) && (dst->is_hip == true)) return false;
 
+  #if defined (__APPLE__)
+  // Metal can't have aliases
+
+  if ((src->is_metal == true) && (dst->is_metal == true)) return false;
+  #endif
+
   // But OpenCL can have aliases
 
   if ((src->is_opencl == true) && (dst->is_opencl == true))
@@ -131,7 +137,13 @@ static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx)
 
       if (alias_device->is_hip == true) continue;
 
-        // this lets native OpenCL runtime survive over generic OpenCL runtime
+      #if defined (__APPLE__)
+      // this lets Metal devices survive over OpenCL
+
+      if (alias_device->is_metal == true) continue;
+      #endif
+
+      // this lets native OpenCL runtime survive over generic OpenCL runtime
 
       if (alias_device->opencl_device_type & CL_DEVICE_TYPE_CPU)
       {
@@ -164,6 +176,9 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p
 {
   if (src->is_cuda   != dst->is_cuda)   return false;
   if (src->is_hip    != dst->is_hip)    return false;
+  #if defined (__APPLE__)
+  if (src->is_metal  != dst->is_metal)  return false;
+  #endif
   if (src->is_opencl != dst->is_opencl) return false;
 
   if (strcmp (src->device_name, dst->device_name) != 0) return false;
@@ -655,7 +670,7 @@ void generate_source_kernel_filename (const bool slow_candidates, const u32 atta
   }
 }
 
-void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file)
+void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file, bool is_metal)
 {
   if (opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
   {
@@ -663,23 +678,23 @@ void generate_cached_kernel_filename (const bool slow_candidates, const u32 atta
     {
       if (slow_candidates == true)
       {
-        snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+        snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
       }
       else
       {
         if (attack_kern == ATTACK_KERN_STRAIGHT)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
         else if (attack_kern == ATTACK_KERN_COMBI)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a1-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a1-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
         else if (attack_kern == ATTACK_KERN_BF)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a3-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a3-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
         else if (attack_kern == ATTACK_KERN_NONE)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
       }
     }
     else
     {
-      snprintf (cached_file, 255, "%s/kernels/m%05d-optimized.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+      snprintf (cached_file, 255, "%s/kernels/m%05d-optimized.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
     }
   }
   else
@@ -688,23 +703,23 @@ void generate_cached_kernel_filename (const bool slow_candidates, const u32 atta
     {
       if (slow_candidates == true)
       {
-        snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+        snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
       }
       else
       {
         if (attack_kern == ATTACK_KERN_STRAIGHT)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
         else if (attack_kern == ATTACK_KERN_COMBI)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a1-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a1-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
         else if (attack_kern == ATTACK_KERN_BF)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a3-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a3-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
         else if (attack_kern == ATTACK_KERN_NONE)
-          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+          snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
       }
     }
     else
     {
-      snprintf (cached_file, 255, "%s/kernels/m%05d-pure.%s.kernel", cache_dir, (int) kern_type, device_name_chksum);
+      snprintf (cached_file, 255, "%s/kernels/m%05d-pure.%s.%s", cache_dir, (int) kern_type, device_name_chksum, (is_metal == true) ? "metallib" : "kernel");
     }
   }
 }
@@ -714,9 +729,9 @@ void generate_source_kernel_shared_filename (char *shared_dir, char *source_file
   snprintf (source_file, 255, "%s/OpenCL/shared.cl", shared_dir);
 }
 
-void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file)
+void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file, bool is_metal)
 {
-  snprintf (cached_file, 255, "%s/kernels/shared.%s.kernel", cache_dir, device_name_chksum_amp_mp);
+  snprintf (cached_file, 255, "%s/kernels/shared.%s.%s", cache_dir, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel");
 }
 
 void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *shared_dir, char *source_file)
@@ -731,15 +746,15 @@ void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_typ
   }
 }
 
-void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file)
+void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file, bool is_metal)
 {
   if ((opti_type & OPTI_TYPE_BRUTE_FORCE) && (opts_type & OPTS_TYPE_PT_GENERATE_BE))
   {
-    snprintf (cached_file, 255, "%s/kernels/markov_be.%s.kernel", cache_dir, device_name_chksum_amp_mp);
+    snprintf (cached_file, 255, "%s/kernels/markov_be.%s.%s", cache_dir, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel");
   }
   else
   {
-    snprintf (cached_file, 255, "%s/kernels/markov_le.%s.kernel", cache_dir, device_name_chksum_amp_mp);
+    snprintf (cached_file, 255, "%s/kernels/markov_le.%s.%s", cache_dir, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel");
   }
 }
 
@@ -748,9 +763,9 @@ void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_di
   snprintf (source_file, 255, "%s/OpenCL/amp_a%u.cl", shared_dir, attack_kern);
 }
 
-void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file)
+void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum_amp_mp, char *cached_file, bool is_metal)
 {
-  snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", cache_dir, attack_kern, device_name_chksum_amp_mp);
+  snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.%s", cache_dir, attack_kern, device_name_chksum_amp_mp, (is_metal == true) ? "metallib" : "kernel");
 }
 
 int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw)
@@ -779,6 +794,13 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &pw_idx, device_param->metal_d_pws_idx, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t)) == -1) return -1;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     /* blocking */
@@ -805,6 +827,13 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
       if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, pw->i, device_param->metal_d_pws_comp_buf, off * sizeof (u32), cnt * sizeof (u32)) == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       /* blocking */
@@ -868,6 +897,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
             if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tm_c, size_tm) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm) == -1) return -1;
@@ -885,6 +921,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
             if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm, device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bfs_c, 0, device_param->metal_d_tm_c, 0, size_tm) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL) == -1) return -1;
@@ -953,6 +996,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, device_param->metal_d_pws_amp_buf, 0, pws_cnt * sizeof (pw_t)) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -978,6 +1028,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, pws_cnt) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (run_metal_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->metal_d_pws_buf, pws_cnt) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, pws_cnt) == -1) return -1;
@@ -1004,6 +1061,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, pws_cnt * hashconfig->hook_size) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           /* blocking */
@@ -1052,6 +1116,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -1150,6 +1221,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
 
+            #if defined (__APPLE__)
+            if (device_param->is_metal == true)
+            {
+              if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, pws_cnt * hashconfig->hook_size) == -1) return -1;
+            }
+            #endif
+
             if (device_param->is_opencl == true)
             {
               /* blocking */
@@ -1198,6 +1276,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
               if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1;
             }
 
+            #if defined (__APPLE__)
+            if (device_param->is_metal == true)
+            {
+              if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
+            }
+            #endif
+
             if (device_param->is_opencl == true)
             {
               if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -1372,6 +1457,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
@@ -1591,6 +1683,140 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
   return 0;
 }
 
+#if defined (__APPLE__)
+int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, id buf, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_atinit_buf64[1] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_atinit;
+
+  num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+  const size_t global_work_size[3] = { num_elements,    1, 1 };
+  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+  id metal_command_buffer = NULL;
+  id metal_command_encoder = NULL;
+
+  if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_atinit, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+  if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 0, buf, NULL, 0) == -1) return -1;
+  if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 1, NULL, device_param->kernel_params_atinit[1], sizeof (u64)) == -1) return -1;
+
+  double ms = 0;
+
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+
+  return 0;
+}
+
+int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, id buf, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_utf8toutf16le_buf64[1] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_utf8toutf16le;
+
+  num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+  const size_t global_work_size[3] = { num_elements,    1, 1 };
+  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+  id metal_command_buffer = NULL;
+  id metal_command_encoder = NULL;
+
+  if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_utf8toutf16le, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+  if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 0, buf, NULL, 0) == -1) return -1;
+  if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 1, NULL, device_param->kernel_params_utf8toutf16le[1], sizeof (u64)) == -1) return -1;
+
+  double ms = 0;
+
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+
+  return 0;
+}
+
+int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, id buf, const u64 size)
+{
+  const u64 num16d = size / 16;
+  const u64 num16m = size % 16;
+
+  // with apple GPU clEnqueueWriteBuffer() return CL_INVALID_VALUE, workaround
+
+  if (num16d)
+  {
+    const u64 kernel_threads = device_param->kernel_wgs_bzero;
+
+    u64 num_elements = round_up_multiple_32 (num16d, kernel_threads);
+
+    id metal_command_buffer = NULL;
+    id metal_command_encoder = NULL;
+
+    if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_bzero, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+    if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 0, buf, NULL, 0) == -1) return -1;
+    if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 1, NULL, (void *) &num16d, sizeof (u64)) == -1) return -1;
+
+    const size_t global_work_size[3] = { num_elements,   1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    double ms = 0;
+
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+  }
+
+  if (num16m)
+  {
+    if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE && \
+       (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK || device_param->opencl_device_vendor_id == VENDOR_ID_APPLE) && \
+       device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+    {
+      u8 *bzeros_apple = (u8 *) hccalloc (num16m, sizeof (u8));
+
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, buf, num16d * 16, bzeros_apple, num16m) == -1) return -1;
+
+      hcfree (bzeros_apple);
+    }
+    else
+    {
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, buf, num16d * 16, bzeros, num16m) == -1) return -1;
+    }
+  }
+
+  return 0;
+}
+
+int run_metal_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, mtl_mem buf, const u64 offset, const u32 value, const u64 size)
+{
+  int rc;
+
+  const u64 N = size / 4;
+
+  /* check that the size is multiple of element size */
+  if (size % 4 != 0)
+  {
+    return CL_INVALID_VALUE;
+  }
+
+  u32 *tmp = (u32 *) hcmalloc (size);
+
+  for (u64 i = 0; i < N; i++)
+  {
+    tmp[i] = value;
+  }
+
+  rc = hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, buf, offset, tmp, size);
+
+  hcfree (tmp);
+
+  return rc;
+}
+#endif // __APPLE__
+
 int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
 {
   u64 num_elements = num;
@@ -2037,6 +2263,137 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
     }
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    mtl_command_encoder metal_command_encoder = NULL;
+    mtl_command_buffer  metal_command_buffer = NULL;
+    mtl_pipeline        metal_pipeline = NULL;
+
+    switch (kern_run)
+    {
+      case KERN_RUN_1:      metal_pipeline = device_param->metal_pipeline1;       break;
+      case KERN_RUN_12:     metal_pipeline = device_param->metal_pipeline12;      break;
+      case KERN_RUN_2P:     metal_pipeline = device_param->metal_pipeline2p;      break;
+      case KERN_RUN_2:      metal_pipeline = device_param->metal_pipeline2;       break;
+      case KERN_RUN_2E:     metal_pipeline = device_param->metal_pipeline2e;      break;
+      case KERN_RUN_23:     metal_pipeline = device_param->metal_pipeline23;      break;
+      case KERN_RUN_3:      metal_pipeline = device_param->metal_pipeline3;       break;
+      case KERN_RUN_4:      metal_pipeline = device_param->metal_pipeline4;       break;
+      case KERN_RUN_INIT2:  metal_pipeline = device_param->metal_pipeline_init2;  break;
+      case KERN_RUN_LOOP2P: metal_pipeline = device_param->metal_pipeline_loop2p; break;
+      case KERN_RUN_LOOP2:  metal_pipeline = device_param->metal_pipeline_loop2;  break;
+      case KERN_RUN_AUX1:   metal_pipeline = device_param->metal_pipeline_aux1;   break;
+      case KERN_RUN_AUX2:   metal_pipeline = device_param->metal_pipeline_aux2;   break;
+      case KERN_RUN_AUX3:   metal_pipeline = device_param->metal_pipeline_aux3;   break;
+      case KERN_RUN_AUX4:   metal_pipeline = device_param->metal_pipeline_aux4;   break;
+    }
+
+    if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_kernel_param, 0, &device_param->kernel_param, device_param->size_kernel_params) == -1) return -1;
+
+    if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+    // all buffers must be allocated
+    int tmp_buf_cnt = 0;
+    mtl_mem tmp_buf[25] = { 0 };
+
+    for (u32 i = 0; i <= 24; i++)
+    {
+      // allocate fake buffer if NULL
+      if (device_param->kernel_params[i] == NULL)
+      {
+        if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, sizeof (u8), NULL, &tmp_buf[tmp_buf_cnt]) == -1) return -1;
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, tmp_buf[tmp_buf_cnt], NULL, 0) == -1) return -1;
+        tmp_buf_cnt++;
+      }
+      else
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params[i], NULL, 0) == -1) return -1;
+      }
+    }
+
+    if (kernel_threads == 0) kernel_threads = 1;
+
+    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+    if (kern_run == KERN_RUN_1)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_3)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_INIT2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT2)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_LOOP2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP2)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+
+    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+    const size_t global_work_size[3] = { num_elements,   1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    double ms = 0;
+
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms);
+
+    if (rc_cc != -1)
+    {
+      float exec_ms = (float) ms;
+
+      if (event_update)
+      {
+        u32 exec_pos = device_param->exec_pos;
+
+        device_param->exec_msec[exec_pos] = exec_ms;
+
+        exec_pos++;
+
+        if (exec_pos == EXEC_CACHE)
+        {
+          exec_pos = 0;
+        }
+
+        device_param->exec_pos = exec_pos;
+      }
+    }
+
+    // release tmp_buf
+
+    for (int i = 0; i < tmp_buf_cnt; i++)
+    {
+      hc_mtlReleaseMemObject (hashcat_ctx, tmp_buf[i]);
+      tmp_buf[i] = NULL;
+    }
+
+    if (rc_cc == -1) return -1;
+  }
+  #endif // __APPLE__
+
   if (device_param->is_opencl == true)
   {
     cl_kernel opencl_kernel = NULL;
@@ -2294,32 +2651,108 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, hip_args, NULL) == -1) return -1;
   }
 
-  if (device_param->is_opencl == true)
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
   {
-    cl_kernel opencl_kernel = NULL;
+    id metal_command_encoder = NULL;
+    id metal_command_buffer  = NULL;
+    id metal_pipeline        = NULL;
 
     switch (kern_run)
     {
-      case KERN_RUN_MP:   opencl_kernel = device_param->opencl_kernel_mp;   break;
-      case KERN_RUN_MP_R: opencl_kernel = device_param->opencl_kernel_mp_r; break;
-      case KERN_RUN_MP_L: opencl_kernel = device_param->opencl_kernel_mp_l; break;
+      case KERN_RUN_MP:   metal_pipeline = device_param->metal_pipeline_mp;   break;
+      case KERN_RUN_MP_R: metal_pipeline = device_param->metal_pipeline_mp_r; break;
+      case KERN_RUN_MP_L: metal_pipeline = device_param->metal_pipeline_mp_l; break;
     }
 
-    switch (kern_run)
+    if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+    if (kern_run == KERN_RUN_MP)
     {
-      case KERN_RUN_MP:   if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp[4]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp[5]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp[6]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp[7]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp[8]) == -1) return -1;
-                          break;
-      case KERN_RUN_MP_R: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_r[4]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp_r[5]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp_r[6]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp_r[7]) == -1) return -1;
-                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp_r[8]) == -1) return -1;
+      for (int i = 0; i < 3; i++)
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_mp[i], NULL, 0) == -1) return -1;
+      }
+
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_mp[3], sizeof (u64)) == -1) return -1;
+
+      for (int i = 4; i < 8; i++)
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, NULL, device_param->kernel_params_mp[i], sizeof (u32)) == -1) return -1;
+      }
+
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 8, NULL, device_param->kernel_params_mp[8], sizeof (u64)) == -1) return -1;
+    }
+    else if (kern_run == KERN_RUN_MP_R)
+    {
+      for (int i = 0; i < 3; i++)
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_mp_r[i], NULL, 0) == -1) return -1;
+      }
+
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_mp_r[3], sizeof (u64)) == -1) return -1;
+
+      for (int i = 4; i < 8; i++)
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, NULL, device_param->kernel_params_mp_r[i], sizeof (u32)) == -1) return -1;
+      }
+
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 8, NULL, device_param->kernel_params_mp_r[8], sizeof (u64)) == -1) return -1;
+    }
+    else if (kern_run == KERN_RUN_MP_L)
+    {
+      for (int i = 0; i < 3; i++)
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_mp_l[i], NULL, 0) == -1) return -1;
+      }
+
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_mp_l[3], sizeof (u64)) == -1) return -1;
+
+      for (int i = 4; i < 9; i++)
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, NULL, device_param->kernel_params_mp_l[i], sizeof (u32)) == -1) return -1;
+      }
+
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 9, NULL, device_param->kernel_params_mp_l[9], sizeof (u64)) == -1) return -1;
+    }
+
+    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+    const size_t global_work_size[3] = { num_elements,   1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    double ms = 0;
+
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+  }
+  #endif // __APPLE__
+
+  if (device_param->is_opencl == true)
+  {
+    cl_kernel opencl_kernel = NULL;
+
+    switch (kern_run)
+    {
+      case KERN_RUN_MP:   opencl_kernel = device_param->opencl_kernel_mp;   break;
+      case KERN_RUN_MP_R: opencl_kernel = device_param->opencl_kernel_mp_r; break;
+      case KERN_RUN_MP_L: opencl_kernel = device_param->opencl_kernel_mp_l; break;
+    }
+
+    switch (kern_run)
+    {
+      case KERN_RUN_MP:   if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp[4]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp[5]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp[6]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp[7]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp[8]) == -1) return -1;
+                          break;
+      case KERN_RUN_MP_R: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_r[4]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp_r[5]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp_r[6]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp_r[7]) == -1) return -1;
+                          if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp_r[8]) == -1) return -1;
                           break;
       case KERN_RUN_MP_L: if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]) == -1) return -1;
                           if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_l[4]) == -1) return -1;
@@ -2362,6 +2795,29 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    const size_t global_work_size[3] = { num_elements,    1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+    id metal_command_encoder = NULL;
+    id metal_command_buffer  = NULL;
+    id metal_pipeline        = device_param->metal_pipeline_tm;
+
+    if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+    for (int i = 0; i < 2; i++)
+    {
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_tm[i], NULL, 0) == -1) return -1;
+    }
+
+    double ms = 0;
+
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+  }
+  #endif // __APPLE__
+
   if (device_param->is_opencl == true)
   {
     cl_kernel cuda_kernel = device_param->opencl_kernel_tm;
@@ -2401,6 +2857,62 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+    const size_t global_work_size[3] = { num_elements,    1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+    id metal_command_encoder = NULL;
+    id metal_command_buffer  = NULL;
+    id metal_pipeline        = device_param->metal_pipeline_amp;
+
+    if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+    // all buffers must be allocated
+    int tmp_buf_cnt = 0;
+
+    mtl_mem tmp_buf[5] = { 0 };
+
+    for (int i = 0; i < 5; i++)
+    {
+      // allocate fake buffer if NULL
+      if (device_param->kernel_params_amp[i] == NULL)
+      {
+        if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, sizeof (u8), NULL, &tmp_buf[tmp_buf_cnt]) == -1) return -1;
+
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, tmp_buf[tmp_buf_cnt], NULL, 0) == -1) return -1;
+
+        tmp_buf_cnt++;
+      }
+      else
+      {
+        if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_amp[i], NULL, 0) == -1) return -1;
+      }
+    }
+
+    if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 5, NULL, device_param->kernel_params_amp[5], sizeof (u32)) == -1) return -1;
+    if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 6, NULL, device_param->kernel_params_amp[6], sizeof (u64)) == -1) return -1;
+
+    double ms = 0;
+
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms);
+
+    // release tmp_buf
+
+    for (int i = 0; i < tmp_buf_cnt; i++)
+    {
+      hc_mtlReleaseMemObject (hashcat_ctx, tmp_buf[i]);
+
+      tmp_buf[i] = NULL;
+    }
+
+    if (rc_cc == -1) return -1;
+  }
+  #endif // __APPLE__
+
   if (device_param->is_opencl == true)
   {
     num_elements = round_up_multiple_64 (num_elements, kernel_threads);
@@ -2444,6 +2956,32 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+
+    const size_t global_work_size[3] = { num_elements,    1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+    id metal_command_buffer  = NULL;
+    id metal_command_encoder = NULL;
+
+    if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, device_param->metal_pipeline_decompress, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
+
+    for (int i = 0; i < 3; i++)
+    {
+      if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, i, device_param->kernel_params_decompress[i], NULL, 0) == -1) return -1;
+    }
+
+    if (hc_mtlSetCommandEncoderArg (hashcat_ctx, metal_command_encoder, 0, 3, NULL, device_param->kernel_params_decompress[3], sizeof (u64)) == -1) return -1;
+
+    double ms = 0;
+
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+  }
+  #endif // __APPLE__
+
   if (device_param->is_opencl == true)
   {
     num_elements = round_up_multiple_64 (num_elements, kernel_threads);
@@ -2512,6 +3050,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
       }
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+      const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+      const u32 off = pw_idx->off;
+
+      if (off)
+      {
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+      }
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -2560,6 +3114,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         }
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+        const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+        const u32 off = pw_idx->off;
+
+        if (off)
+        {
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+        }
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -2642,6 +3212,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
           }
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+          const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+          const u32 off = pw_idx->off;
+
+          if (off)
+          {
+            if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+          }
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -2690,6 +3276,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
             }
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+            }
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -2736,6 +3338,22 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
             }
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_idx, 0, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_comp_buf, 0, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+            }
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -2967,6 +3585,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
             if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_rules_c, 0, device_param->metal_d_rules, innerloop_pos * sizeof (kernel_rule_t), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1;
@@ -3092,6 +3717,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
               }
 
+              #if defined (__APPLE__)
+              if (device_param->is_metal == true)
+              {
+                if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+              #endif
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
@@ -3115,6 +3747,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
               }
 
+              #if defined (__APPLE__)
+              if (device_param->is_metal == true)
+              {
+                if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->metal_d_combs, 0, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+              #endif
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -3138,6 +3777,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
               }
 
+              #if defined (__APPLE__)
+              if (device_param->is_metal == true)
+              {
+                if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->metal_d_combs, 0, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+              #endif
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -3264,6 +3910,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
               }
 
+              #if defined (__APPLE__)
+              if (device_param->is_metal == true)
+              {
+                if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+              #endif
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
@@ -3287,6 +3940,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
               }
 
+              #if defined (__APPLE__)
+              if (device_param->is_metal == true)
+              {
+                if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, device_param->metal_d_combs, 0, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+              #endif
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -3312,6 +3972,13 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
             if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t), device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyDtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bfs_c, 0, device_param->metal_d_bfs, 0, innerloop_left * sizeof (bf_t)) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL) == -1) return -1;
@@ -3720,6 +4387,44 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  /**
+   * Init Metal runtime
+   */
+
+  int rc_metal_init = -1;
+
+  #if defined (__APPLE__)
+  if (user_options->backend_ignore_metal == false)
+  {
+    MTL_PTR *mtl = (MTL_PTR *) hcmalloc (sizeof (MTL_PTR));
+
+    backend_ctx->mtl = mtl;
+
+    rc_metal_init = mtl_init (hashcat_ctx);
+
+    if (rc_metal_init == 0)
+    {
+      size_t version_len = 0;
+
+      if (hc_mtlRuntimeGetVersionString (hashcat_ctx, NULL, &version_len) == -1) return -1;
+
+      if (version_len == 0) return -1;
+
+      backend_ctx->metal_runtimeVersionStr = (char *) hcmalloc (version_len + 1);
+
+      if (hc_mtlRuntimeGetVersionString (hashcat_ctx, backend_ctx->metal_runtimeVersionStr, &version_len) == -1) return -1;
+    }
+    else
+    {
+      rc_metal_init = -1;
+
+      backend_ctx->rc_metal_init = rc_metal_init;
+
+      mtl_close (hashcat_ctx);
+    }
+  }
+  #endif // __APPLE__
+
   /**
    * Load and map OpenCL library calls
    */
@@ -3743,9 +4448,9 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
      * return if both CUDA and OpenCL initialization failed
      */
 
-    if ((rc_cuda_init == -1) && (rc_hip_init == -1) && (rc_ocl_init == -1))
+    if ((rc_cuda_init == -1) && (rc_hip_init == -1) && (rc_ocl_init == -1) && (rc_metal_init == -1))
     {
-      event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, HIP or CUDA installation found.");
+      event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, Metal, HIP or CUDA installation found.");
 
       event_log_warning (hashcat_ctx, "You are probably missing the CUDA, HIP or OpenCL runtime installation.");
       event_log_warning (hashcat_ctx, NULL);
@@ -3973,7 +4678,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
           if (IGNORE_DEVICE_NOT_FOUND)
           {
-            backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+            //backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
             OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
 
@@ -4068,9 +4773,13 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * Final checks
    */
 
-  if ((backend_ctx->cuda == NULL) && (backend_ctx->hip == NULL) && (backend_ctx->ocl == NULL))
+  if ((backend_ctx->cuda == NULL) && (backend_ctx->hip == NULL) && (backend_ctx->ocl == NULL) && (backend_ctx->mtl == NULL))
   {
-    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible, HIP-compatible or CUDA-compatible platform found.");
+    #if defined (__APPLE__)
+    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, Metal, HIP or CUDA compatible platform found.");
+    #else
+    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, HIP or CUDA compatible platform found.");
+    #endif
 
     event_log_warning (hashcat_ctx, "You are probably missing the OpenCL, CUDA or HIP runtime installation.");
     event_log_warning (hashcat_ctx, NULL);
@@ -4187,9 +4896,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->is_cuda   = true;
       device_param->is_hip    = false;
+      device_param->is_metal  = false;
       device_param->is_opencl = false;
 
-
       device_param->use_opencl12 = false;
       device_param->use_opencl20 = false;
       device_param->use_opencl21 = false;
@@ -4442,16 +5151,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
           if (device_param->sm_major < 5)
           {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+            if (user_options->quiet == false)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+              event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+              event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+            }
           }
 
           if (device_param->kernel_exec_timeout != 0)
           {
-            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
-            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
-            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+            if (user_options->quiet == false)
+            {
+              event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+              event_log_advice (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+              event_log_advice (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+            }
           }
         }
 
@@ -4566,6 +5281,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->is_cuda   = false;
       device_param->is_hip    = true;
+      device_param->is_metal  = false;
       device_param->is_opencl = false;
 
       device_param->use_opencl12 = false;
@@ -4826,16 +5542,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
           if (device_param->sm_major < 5)
           {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+            if (user_options->quiet == false)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+              event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+              event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+            }
           }
 
           if (device_param->kernel_exec_timeout != 0)
           {
-            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
-            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
-            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+            if (user_options->quiet == false)
+            {
+              event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+              event_log_advice (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+              event_log_advice (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+            }
           }
         }
 
@@ -4903,4243 +5625,5050 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->hip_devices_cnt     = hip_devices_cnt;
   backend_ctx->hip_devices_active  = hip_devices_active;
 
-  // OCL
+  // Metal
 
-  int opencl_devices_cnt    = 0;
-  int opencl_devices_active = 0;
+  int metal_devices_cnt    = 0;
+  int metal_devices_active = 0;
 
-  if (backend_ctx->ocl)
+  #if defined (__APPLE__)
+  if (backend_ctx->mtl)
   {
-    /**
-     * OpenCL devices: simply push all devices from all platforms into the same device array
-     */
-
-    cl_uint         opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
-    cl_device_id  **opencl_platforms_devices     = backend_ctx->opencl_platforms_devices;
-    cl_uint        *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
-    cl_uint        *opencl_platforms_vendor_id   = backend_ctx->opencl_platforms_vendor_id;
-    char          **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
+    // device count
 
-    for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+    if (hc_mtlDeviceGetCount (hashcat_ctx, &metal_devices_cnt) == -1)
     {
-      cl_device_id   *opencl_platform_devices     = opencl_platforms_devices[opencl_platforms_idx];
-      cl_uint         opencl_platform_devices_cnt = opencl_platforms_devices_cnt[opencl_platforms_idx];
-      cl_uint         opencl_platform_vendor_id   = opencl_platforms_vendor_id[opencl_platforms_idx];
-      char           *opencl_platform_version     = opencl_platforms_version[opencl_platforms_idx];
+      mtl_close (hashcat_ctx);
+    }
 
-      for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++)
-      {
-        const u32 device_id = backend_devices_idx;
+    backend_ctx->metal_devices_cnt = metal_devices_cnt;
 
-        hc_device_param_t *device_param = &devices_param[device_id];
+    // device specific
 
-        device_param->device_id = device_id;
+    for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++, backend_devices_idx++)
+    {
+      const u32 device_id = backend_devices_idx;
 
-        backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx;
+      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
 
-        backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = backend_devices_idx;
+      device_param->device_id = device_id;
 
-        device_param->opencl_platform_vendor_id = opencl_platform_vendor_id;
+      backend_ctx->backend_device_from_metal[metal_devices_idx] = backend_devices_idx;
 
-        device_param->opencl_device = opencl_platform_devices[opencl_platform_devices_idx];
+      mtl_device_id metal_device = NULL;
 
-        //device_param->opencl_platform = opencl_platform;
+      if (hc_mtlDeviceGet (hashcat_ctx, &metal_device, metal_devices_idx) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        device_param->is_cuda   = false;
-        device_param->is_hip    = false;
-        device_param->is_opencl = true;
+      device_param->metal_device = metal_device;
 
-        // store opencl platform i
+      device_param->is_cuda   = false;
+      device_param->is_hip    = false;
+      device_param->is_metal  = true;
+      device_param->is_opencl = false;
 
-        device_param->opencl_platform_id = opencl_platforms_idx;
+      device_param->use_opencl12 = false;
+      device_param->use_opencl20 = false;
+      device_param->use_opencl21 = false;
 
-        // check OpenCL version
+      device_param->is_apple_silicon = is_apple_silicon();
 
-        device_param->use_opencl12 = false;
-        device_param->use_opencl20 = false;
-        device_param->use_opencl21 = false;
+      // some attributes have to be hardcoded values because they are used for instance in the build options
 
-        int opencl_version_min = 0;
-        int opencl_version_maj = 0;
+      device_param->device_local_mem_type     = CL_LOCAL;
+      device_param->opencl_device_type        = CL_DEVICE_TYPE_GPU;
+      device_param->opencl_device_vendor_id   = VENDOR_ID_APPLE;
+      device_param->opencl_platform_vendor_id = VENDOR_ID_APPLE;
 
-        if (sscanf (opencl_platform_version, "OpenCL %d.%d", &opencl_version_min, &opencl_version_maj) == 2)
-        {
-          if ((opencl_version_min == 1) && (opencl_version_maj == 2))
-          {
-            device_param->use_opencl12 = true;
-          }
-          else if ((opencl_version_min == 2) && (opencl_version_maj == 0))
-          {
-            device_param->use_opencl20 = true;
-          }
-          else if ((opencl_version_min == 2) && (opencl_version_maj == 1))
-          {
-            device_param->use_opencl21 = true;
-          }
-        }
+      // or in the cached kernel checksum
 
-        size_t param_value_size = 0;
+      device_param->opencl_device_version     = "";
+      device_param->opencl_driver_version     = "";
 
-        // opencl_device_type
+      // or just to make sure they are not NULL
 
-        cl_device_type opencl_device_type;
+      device_param->opencl_device_vendor     = strdup ("Apple");
+      device_param->opencl_device_c_version  = "";
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      // sm_minor, sm_major
 
-        opencl_device_type &= ~CL_DEVICE_TYPE_DEFAULT;
+      int mtl_major = 0;
+      int mtl_minor = 0;
 
-        device_param->opencl_device_type = opencl_device_type;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_major, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // device_name
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_minor, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // try CL_DEVICE_BOARD_NAME_AMD first, if it fails fall back to CL_DEVICE_NAME
-        // since AMD ROCm does not identify itself at this stage we simply check for return code from clGetDeviceInfo()
+      device_param->mtl_major = mtl_major;
+      device_param->mtl_minor = mtl_minor;
 
-        #define CHECK_BOARD_NAME_AMD 1
+      // device_name
 
-        cl_int rc_board_name_amd = CL_INVALID_VALUE;
+      char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY);
 
-        if (CHECK_BOARD_NAME_AMD)
-        {
-          backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+      if (hc_mtlDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        hcfree (device_name);
+        continue;
+      }
 
-          OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
+      device_param->device_name = device_name;
 
-          rc_board_name_amd = ocl->clGetDeviceInfo (device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, NULL);
-        }
+      hc_string_trim_leading (device_name);
 
-        if (rc_board_name_amd == CL_SUCCESS)
-        {
-          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, &param_value_size) == -1)
-          {
-            device_param->skipped = true;
-            continue;
-          }
+      hc_string_trim_trailing (device_name);
 
-          char *device_name = (char *) hcmalloc (param_value_size);
+      // device_processors
 
-          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, param_value_size, device_name, NULL) == -1)
-          {
-            device_param->skipped = true;
-            hcfree (device_name);
-            continue;
-          }
+      int device_processors = 0;
 
-          device_param->device_name = device_name;
-        }
-        else
-        {
-          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, 0, NULL, &param_value_size) == -1)
-          {
-            device_param->skipped = true;
-            continue;
-          }
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_processors, MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-          char *device_name = (char *) hcmalloc (param_value_size);
+      device_param->device_processors = device_processors;
 
-          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, param_value_size, device_name, NULL) == -1)
-          {
-            device_param->skipped = true;
-            hcfree (device_name);
-            continue;
-          }
+      // device_host_unified_memory
 
-          device_param->device_name = device_name;
-        }
+      int device_host_unified_memory = 0;
 
-        hc_string_trim_leading (device_param->device_name);
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_host_unified_memory, MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        hc_string_trim_trailing (device_param->device_name);
+      device_param->device_host_unified_memory = device_host_unified_memory;
 
-        // device_vendor
+      // device_global_mem, device_available_mem
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      size_t bytes = 0;
 
-        char *opencl_device_vendor = (char *) hcmalloc (param_value_size);
+      if (hc_mtlDeviceTotalMem (hashcat_ctx, &bytes, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, param_value_size, opencl_device_vendor, NULL) == -1)
-        {
-          device_param->skipped = true;
-          hcfree (opencl_device_vendor);
-          continue;
-        }
+      device_param->device_global_mem = (u64) bytes;
 
-        device_param->opencl_device_vendor = opencl_device_vendor;
+      device_param->device_available_mem = 0;
 
-        cl_uint opencl_device_vendor_id = 0;
+      // device_maxmem_alloc
 
-        if (strcmp (opencl_device_vendor, CL_VENDOR_AMD1) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_AMD;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD2) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_AMD;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_APPLE;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_AMD;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_NV) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_NV;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL2) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_SDK) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_MESA) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_MESA;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_NV) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_NV;
-        }
-        else if (strcmp (opencl_device_vendor, CL_VENDOR_POCL) == 0)
-        {
-          opencl_device_vendor_id = VENDOR_ID_POCL;
-        }
-        else
-        {
-          opencl_device_vendor_id = VENDOR_ID_GENERIC;
-        }
+      size_t device_maxmem_alloc = 0;
 
-        device_param->opencl_device_vendor_id = opencl_device_vendor_id;
+      if (hc_mtlDeviceMaxMemAlloc (hashcat_ctx, &device_maxmem_alloc, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // device_version
+      device_param->device_maxmem_alloc = device_maxmem_alloc;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, 0, NULL, &param_value_size) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      if (device_host_unified_memory == 1) device_param->device_maxmem_alloc /= 2;
 
-        char *opencl_device_version = (char *) hcmalloc (param_value_size);
+      // warp size
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, param_value_size, opencl_device_version, NULL) == -1)
-        {
-          device_param->skipped = true;
-          hcfree (opencl_device_version);
-          continue;
-        }
+      int metal_warp_size = 0;
 
-        device_param->opencl_device_version = opencl_device_version;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &metal_warp_size, MTL_DEVICE_ATTRIBUTE_WARP_SIZE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // opencl_device_c_version
+      device_param->metal_warp_size = metal_warp_size;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      // device_maxworkgroup_size
 
-        char *opencl_device_c_version = (char *) hcmalloc (param_value_size);
+      int device_maxworkgroup_size = 0;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, opencl_device_c_version, NULL) == -1)
-        {
-          device_param->skipped = true;
-          hcfree (opencl_device_c_version);
-          continue;
-        }
-
-        device_param->opencl_device_c_version = opencl_device_c_version;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // max_compute_units
+      device_param->device_maxworkgroup_size = device_maxworkgroup_size;
 
-        cl_uint device_processors = 0;
+      // max_clock_frequency
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      int device_maxclock_frequency = 0;
 
-        device_param->device_processors = device_processors;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, MTL_DEVICE_ATTRIBUTE_CLOCK_RATE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // device_host_unified_memory
+      device_param->device_maxclock_frequency = device_maxclock_frequency / 1000;
 
-        cl_bool device_host_unified_memory = false;
+      // pcie_bus, pcie_device, pcie_function
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      device_param->pcie_domain   = 0;
+      device_param->pcie_bus      = 0;
+      device_param->pcie_device   = 0;
+      device_param->pcie_function = 0;
 
-        device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0;
+      int device_physical_location = 0;
 
-        // device_global_mem
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_physical_location, MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        cl_ulong device_global_mem = 0;
+      device_param->device_physical_location = device_physical_location;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      int device_location_number = 0;
 
-        device_param->device_global_mem = device_global_mem;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_location_number, MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        device_param->device_available_mem = 0;
+      device_param->device_location_number = device_location_number;
 
-        // device_maxmem_alloc
+      int device_max_transfer_rate = 0;
 
-        cl_ulong device_maxmem_alloc = 0;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_max_transfer_rate, MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      device_param->device_max_transfer_rate = device_max_transfer_rate;
 
-        device_param->device_maxmem_alloc = device_maxmem_alloc;
+      int device_registryID = 0;
 
-        if (device_param->device_host_unified_memory == 1)
-        {
-          // so, we actually have only half the memory because we need the same buffers on host side
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_registryID, MTL_DEVICE_ATTRIBUTE_REGISTRY_ID, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-          device_param->device_maxmem_alloc /= 2;
-        }
+      device_param->device_registryID = device_registryID;
 
-        // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
-        // testwise disabling that
-        //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
+      // kernel_exec_timeout
 
-        // max_work_group_size
+      device_param->kernel_exec_timeout = 0;
 
-        size_t device_maxworkgroup_size = 0;
+      // wgs_multiple
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      device_param->kernel_preferred_wgs_multiple = metal_warp_size;
 
-        device_param->device_maxworkgroup_size = device_maxworkgroup_size;
+      // max_shared_memory_per_block
 
-        // max_clock_frequency
+      int max_shared_memory_per_block = 0;
 
-        cl_uint device_maxclock_frequency = 0;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      if (max_shared_memory_per_block < 32768)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1);
 
-        device_param->device_maxclock_frequency = device_maxclock_frequency;
+        device_param->skipped = true;
+      }
 
-        // device_endian_little
+      device_param->device_local_mem_size = max_shared_memory_per_block;
 
-        cl_bool device_endian_little = CL_FALSE;
+      // device_max_constant_buffer_size
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      int device_max_constant_buffer_size = 0;
 
-        if (device_endian_little == CL_FALSE)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1);
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-          device_param->skipped = true;
-        }
+      if (device_max_constant_buffer_size < 65536)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
 
-        // device_available
+        device_param->skipped = true;
+      }
 
-        cl_bool device_available = CL_FALSE;
+      // gpu properties
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      int device_is_headless = 0;
 
-        if (device_available == CL_FALSE)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1);
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_headless, MTL_DEVICE_ATTRIBUTE_HEADLESS, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-          device_param->skipped = true;
-        }
+      device_param->device_is_headless = device_is_headless;
 
-        // device_compiler_available
+      int device_is_low_power = 0;
 
-        cl_bool device_compiler_available = CL_FALSE;
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_low_power, MTL_DEVICE_ATTRIBUTE_LOW_POWER, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      device_param->device_is_low_power = device_is_low_power;
 
-        if (device_compiler_available == CL_FALSE)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1);
+      int device_is_removable = 0;
 
-          device_param->skipped = true;
-        }
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_removable, MTL_DEVICE_ATTRIBUTE_REMOVABLE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // device_execution_capabilities
+      device_param->device_is_removable = device_is_removable;
 
-        cl_device_exec_capabilities device_execution_capabilities;
+      // skipped
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+      if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
+      {
+        device_param->skipped = true;
+      }
 
-        if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1);
+      if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0)
+      {
+        device_param->skipped = true;
+      }
 
-          device_param->skipped = true;
-        }
+      if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_APPLE))
+      {
+        need_iokit = true;
+      }
 
-        // device_extensions
+      // CPU burning loop damper
+      // Value is given as number between 0-100
+      // By default 8%
+      // in theory not needed with Metal
 
-        size_t device_extensions_size;
+      device_param->spin_damp = 0;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size) == -1)
+      // common driver check
+      /*
+      if (device_param->skipped == false)
+      {
+        if ((user_options->force == false) && (user_options->backend_info == false))
         {
-          device_param->skipped = true;
-          continue;
         }
 
-        char *device_extensions = (char *) hcmalloc (device_extensions_size + 1);
+        // activate device moved below, at end
+      }*/
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL) == -1)
-        {
-          device_param->skipped = true;
-          hcfree (device_extensions);
-          continue;
-        }
+      // instruction set
 
-        if (strstr (device_extensions, "base_atomics") == 0)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1);
+      device_param->has_add   = false;
+      device_param->has_addc  = false;
+      device_param->has_sub   = false;
+      device_param->has_subc  = false;
+      device_param->has_bfe   = false;
+      device_param->has_lop3  = false;
+      device_param->has_mov64 = false;
+      device_param->has_prmt  = false;
 
-          device_param->skipped = true;
-        }
+      // check if we need skip device
 
-        if (strstr (device_extensions, "byte_addressable_store") == 0)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1);
+      if (device_param->device_processors == 1) device_param->skipped = true;
 
-          device_param->skipped = true;
-        }
+      /**
+       * activate device
+       */
 
-        hcfree (device_extensions);
+      if (device_param->skipped == false) metal_devices_active++;
+    }
+  }
+  #endif // __APPLE__
 
-        // kernel_preferred_wgs_multiple
+  backend_ctx->metal_devices_cnt     = metal_devices_cnt;
+  backend_ctx->metal_devices_active  = metal_devices_active;
 
-        // There is global query for this attribute on OpenCL that is not linked to a specific kernel, so we set it to a fixed value
-        // Later in the code, we add vendor specific extensions to query it
+  // OCL
 
-        device_param->kernel_preferred_wgs_multiple = 8;
+  int opencl_devices_cnt    = 0;
+  int opencl_devices_active = 0;
 
-        // device_local_mem_type
+  if (backend_ctx->ocl)
+  {
+    /**
+     * OpenCL devices: simply push all devices from all platforms into the same device array
+     */
 
-        cl_device_local_mem_type device_local_mem_type;
+    cl_uint         opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
+    cl_device_id  **opencl_platforms_devices     = backend_ctx->opencl_platforms_devices;
+    cl_uint        *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
+    cl_uint        *opencl_platforms_vendor_id   = backend_ctx->opencl_platforms_vendor_id;
+    char          **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+    for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+    {
+      cl_device_id   *opencl_platform_devices     = opencl_platforms_devices[opencl_platforms_idx];
+      cl_uint         opencl_platform_devices_cnt = opencl_platforms_devices_cnt[opencl_platforms_idx];
+      cl_uint         opencl_platform_vendor_id   = opencl_platforms_vendor_id[opencl_platforms_idx];
+      char           *opencl_platform_version     = opencl_platforms_version[opencl_platforms_idx];
 
-        device_param->device_local_mem_type = device_local_mem_type;
+      for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++)
+      {
+        const u32 device_id = backend_devices_idx;
 
-        // device_max_constant_buffer_size
+        hc_device_param_t *device_param = &devices_param[device_id];
 
-        cl_ulong device_max_constant_buffer_size;
+        device_param->device_id = device_id;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+        backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx;
 
-        if (device_local_mem_type == CL_LOCAL)
-        {
-          if (device_max_constant_buffer_size < 65536)
-          {
-            event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1);
+        backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = backend_devices_idx;
 
-            device_param->skipped = true;
-          }
-        }
+        device_param->opencl_platform_vendor_id = opencl_platform_vendor_id;
 
-        // device_local_mem_size
+        device_param->opencl_device = opencl_platform_devices[opencl_platform_devices_idx];
 
-        cl_ulong device_local_mem_size = 0;
+        //device_param->opencl_platform = opencl_platform;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL) == -1)
-        {
-          device_param->skipped = true;
-          continue;
-        }
+        device_param->is_cuda   = false;
+        device_param->is_hip    = false;
+        device_param->is_metal  = false;
+        device_param->is_opencl = true;
 
-        if (device_local_mem_type == CL_LOCAL)
-        {
-          if (device_local_mem_size < 32768)
-          {
-            event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
+        // store opencl platform i
 
-            device_param->skipped = true;
-          }
-        }
+        device_param->opencl_platform_id = opencl_platforms_idx;
 
-        // workaround inc!
-        // allocating all reported local memory causes jit to fail with: SC failed. No reason given.
-        // if we limit ourself to 32k it seems to work
+        // check OpenCL version
 
-        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+        device_param->use_opencl12 = false;
+        device_param->use_opencl20 = false;
+        device_param->use_opencl21 = false;
+
+        int opencl_version_min = 0;
+        int opencl_version_maj = 0;
+
+        if (sscanf (opencl_platform_version, "OpenCL %d.%d", &opencl_version_min, &opencl_version_maj) == 2)
         {
-          if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
+          if ((opencl_version_min == 1) && (opencl_version_maj == 2))
           {
-            if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-            {
-              device_local_mem_size = MIN (device_local_mem_size, 32768);
-            }
+            device_param->use_opencl12 = true;
+          }
+          else if ((opencl_version_min == 2) && (opencl_version_maj == 0))
+          {
+            device_param->use_opencl20 = true;
+          }
+          else if ((opencl_version_min == 2) && (opencl_version_maj == 1))
+          {
+            device_param->use_opencl21 = true;
           }
         }
 
-        device_param->device_local_mem_size = device_local_mem_size;
-
-        // handling known bugs on POCL
-
-        // POCL < 1.9 doesn't like quotes in the include path, see:
-        // https://github.com/hashcat/hashcat/issues/2950
-        // https://github.com/pocl/pocl/issues/962
+        size_t param_value_size = 0;
 
-        // POCL < 1.5 and older LLVM versions are known to fail compiling kernels
-        // https://github.com/hashcat/hashcat/issues/2344
+        // opencl_device_type
 
-        // we need to inform the user to update
+        cl_device_type opencl_device_type;
 
-        if (opencl_platform_vendor_id == VENDOR_ID_POCL)
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL) == -1)
         {
-          char *pocl_version_ptr = strstr (opencl_platform_version, "pocl ");
-          char *llvm_version_ptr = strstr (opencl_platform_version, "LLVM ");
-
-          if ((pocl_version_ptr != NULL) && (llvm_version_ptr != NULL))
-          {
-            bool pocl_skip = false;
-
-            int pocl_maj = 0;
-            int pocl_min = 0;
-
-            int pocl_bug_whitespace_on_path = 0;
-            int pocl_bug_kernel_compiling_failure = 0;
-
-            const int res1 = sscanf (pocl_version_ptr, "pocl %d.%d", &pocl_maj, &pocl_min);
-
-            if (res1 == 2)
-            {
-              const int pocl_version = (pocl_maj * 100) + pocl_min;
-
-              if (pocl_version < 109)
-              {
-                if (strchr (folder_config->cpath_real, ' ') != NULL)
-                {
-                  pocl_skip = true;
-                  pocl_bug_whitespace_on_path = 1;
-                }
+          device_param->skipped = true;
+          continue;
+        }
 
-                if (pocl_version < 105)
-                {
-                  pocl_skip = true;
-                  pocl_bug_kernel_compiling_failure = 1;
-                }
-              }
-            }
+        opencl_device_type &= ~CL_DEVICE_TYPE_DEFAULT;
 
-            int llvm_maj = 0;
-            int llvm_min = 0;
+        device_param->opencl_device_type = opencl_device_type;
 
-            const int res2 = sscanf (llvm_version_ptr, "LLVM %d.%d", &llvm_maj, &llvm_min);
+        // device_name
 
-            if (res2 == 2)
-            {
-              const int llvm_version = (llvm_maj * 100) + llvm_min;
+        // try CL_DEVICE_BOARD_NAME_AMD first, if it fails fall back to CL_DEVICE_NAME
+        // since AMD ROCm does not identify itself at this stage we simply check for return code from clGetDeviceInfo()
 
-              if (llvm_version < 900)
-              {
-                pocl_skip = true;
-              }
-            }
+        #define CHECK_BOARD_NAME_AMD 1
 
-            if (pocl_skip == true)
-            {
-              if (user_options->force == false)
-              {
-                event_log_error (hashcat_ctx, "* Device #%u: Outdated POCL OpenCL driver detected!", device_id + 1);
+        cl_int rc_board_name_amd = CL_INVALID_VALUE;
 
-                if (user_options->quiet == false)
-                {
-                  if (pocl_bug_kernel_compiling_failure == 1)
-                  {
-                    event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives.");
-                  }
+        if (CHECK_BOARD_NAME_AMD)
+        {
+          //backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-                  if (pocl_bug_whitespace_on_path == 1)
-                  {
-                    event_log_warning (hashcat_ctx, "Consider moving hashcat to a path with no spaces if you want to use this POCL version.");
-                  }
+          OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
 
-                  event_log_warning (hashcat_ctx, "We recommend using a version of POCL >= 1.9");
-                  event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors.");
-                  event_log_warning (hashcat_ctx, NULL);
-                }
+          rc_board_name_amd = ocl->clGetDeviceInfo (device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, NULL);
+        }
 
-                device_param->skipped = true;
-              }
-            }
+        if (rc_board_name_amd == CL_SUCCESS)
+        {
+          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, 0, NULL, &param_value_size) == -1)
+          {
+            device_param->skipped = true;
+            continue;
           }
-        }
 
-        char *opencl_device_version_lower = hcstrdup (opencl_device_version);
+          char *device_name = (char *) hcmalloc (param_value_size);
 
-        lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower));
+          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_BOARD_NAME_AMD, param_value_size, device_name, NULL) == -1)
+          {
+            device_param->skipped = true;
+            hcfree (device_name);
+            continue;
+          }
 
-        if ((strstr (opencl_device_version_lower, "beignet "))
-         || (strstr (opencl_device_version_lower, " beignet"))
-         || (strstr (opencl_device_version_lower, "mesa "))
-         || (strstr (opencl_device_version_lower, " mesa")))
+          device_param->device_name = device_name;
+        }
+        else
         {
-          // BEIGNET: https://github.com/hashcat/hashcat/issues/2243
-          // MESA:    https://github.com/hashcat/hashcat/issues/2269
-
-          if (user_options->force == false)
+          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, 0, NULL, &param_value_size) == -1)
           {
-            event_log_error (hashcat_ctx, "* Device #%u: Unstable OpenCL driver detected!", device_id + 1);
+            device_param->skipped = true;
+            continue;
+          }
 
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL);
+          char *device_name = (char *) hcmalloc (param_value_size);
 
+          if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, param_value_size, device_name, NULL) == -1)
+          {
             device_param->skipped = true;
+            hcfree (device_name);
+            continue;
           }
+
+          device_param->device_name = device_name;
         }
 
-        hcfree (opencl_device_version_lower);
+        hc_string_trim_leading (device_param->device_name);
 
-        // Since some times we get reports from users about not working hashcat, dropping error messages like:
-        // CL_INVALID_COMMAND_QUEUE and CL_OUT_OF_RESOURCES
-        // Turns out that this is caused by Intel OpenCL runtime handling their GPU devices
-        // Disable such devices unless the user forces to use it
-        // This is successfully workaround with new threading model and new memory management
-        // Tested on Windows 10
-        // OpenCL.Version.: OpenCL C 2.1
-        // Driver.Version.: 23.20.16.4973
+        hc_string_trim_trailing (device_param->device_name);
 
-        /*
-        #if !defined (__APPLE__)
-        if (opencl_device_type & CL_DEVICE_TYPE_GPU)
-        {
-          if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
-          {
-            if (user_options->force == false)
-            {
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1);
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             We are waiting for updated OpenCL drivers from Intel.");
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL);
+        // device_vendor
 
-              device_param->skipped = true;
-            }
-          }
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size) == -1)
+        {
+          device_param->skipped = true;
+          continue;
         }
-        #endif // __APPLE__
-        */
 
-        // skipped
+        char *opencl_device_vendor = (char *) hcmalloc (param_value_size);
 
-        if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, param_value_size, opencl_device_vendor, NULL) == -1)
         {
           device_param->skipped = true;
-        }
+          hcfree (opencl_device_vendor);
+          continue;
+        }
 
-        if ((backend_ctx->opencl_device_types_filter & (opencl_device_type)) == 0)
+        device_param->opencl_device_vendor = opencl_device_vendor;
+
+        cl_uint opencl_device_vendor_id = 0;
+
+        if (strcmp (opencl_device_vendor, CL_VENDOR_AMD1) == 0)
         {
-          device_param->skipped = true;
+          opencl_device_vendor_id = VENDOR_ID_AMD;
         }
-
-        #if defined (__APPLE__)
-        if (opencl_device_type & CL_DEVICE_TYPE_GPU)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD2) == 0)
         {
-          //if (user_options->force == false)
-          if (device_param->skipped == false)
-          {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Apple's OpenCL drivers (GPU) are known to be unreliable.", device_id + 1);
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You have been warned.");
-            //if (user_options->quiet == false) event_log_warning (hashcat_ctx, "  There are many reports of false negatives and other issues.");
-            //if (user_options->quiet == false) event_log_warning (hashcat_ctx, "  This is not a hashcat issue. Other projects report issues with these drivers.");
-            //if (user_options->quiet == false) event_log_warning (hashcat_ctx, "  You can use --force to override, but do not report related errors. You have been warned.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL);
-
-            //device_param->skipped = true;
-          }
+          opencl_device_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_APPLE;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_NV) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_NV;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL2) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_SDK) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_MESA) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_MESA;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_NV) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_NV;
+        }
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_POCL) == 0)
+        {
+          opencl_device_vendor_id = VENDOR_ID_POCL;
+        }
+        else
+        {
+          opencl_device_vendor_id = VENDOR_ID_GENERIC;
         }
-        #endif // __APPLE__
 
-        // driver_version
+        device_param->opencl_device_vendor_id = opencl_device_vendor_id;
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, 0, NULL, &param_value_size) == -1)
+        // device_version
+
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, 0, NULL, &param_value_size) == -1)
         {
           device_param->skipped = true;
           continue;
         }
 
-        char *opencl_driver_version = (char *) hcmalloc (param_value_size);
+        char *opencl_device_version = (char *) hcmalloc (param_value_size);
 
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, param_value_size, opencl_driver_version, NULL) == -1)
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, param_value_size, opencl_device_version, NULL) == -1)
         {
           device_param->skipped = true;
-          hcfree (opencl_driver_version);
+          hcfree (opencl_device_version);
           continue;
         }
 
-        device_param->opencl_driver_version = opencl_driver_version;
+        device_param->opencl_device_version = opencl_device_version;
 
-        // vendor specific
+        // opencl_device_c_version
 
-        if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size) == -1)
         {
-          #if defined (__APPLE__)
-          if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
-          {
-            if (device_param->skipped == false)
-            {
-              need_iokit = true;
-            }
-          }
-          #endif
-
-          #if defined (__linux__)
-          need_sysfs_cpu = true;
-          #endif
+          device_param->skipped = true;
+          continue;
         }
 
-        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+        char *opencl_device_c_version = (char *) hcmalloc (param_value_size);
+
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, opencl_device_c_version, NULL) == -1)
         {
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
-          {
-            need_adl = true;
+          device_param->skipped = true;
+          hcfree (opencl_device_c_version);
+          continue;
+        }
 
-            #if defined (__linux__)
-            need_sysfs_amdgpu = true;
-            #endif
-          }
+        device_param->opencl_device_c_version = opencl_device_c_version;
 
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
-          {
-            need_nvml = true;
+        // max_compute_units
 
-            #if defined (_WIN) || defined (__CYGWIN__)
-            need_nvapi = true;
-            #endif
-          }
-        }
+        cl_uint device_processors = 0;
 
-        if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL) == -1)
         {
-          // they like this
-
-          device_param->kernel_preferred_wgs_multiple = 1;
+          device_param->skipped = true;
+          continue;
         }
 
-        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
-        {
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
-          {
-            // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
-            #define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
+        device_param->device_processors = device_processors;
 
-            // crazy, but apple does not support this query!
-            // the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel.
-            // so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default.
-            // we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0.
+        // device_host_unified_memory
 
-            device_param->kernel_preferred_wgs_multiple = 0;
-          }
+        cl_bool device_host_unified_memory = false;
 
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
-          {
-            cl_uint device_wavefront_width_amd;
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-            // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
-            #define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
+        device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0;
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (device_wavefront_width_amd), &device_wavefront_width_amd, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+        // device_global_mem
 
-            device_param->kernel_preferred_wgs_multiple = device_wavefront_width_amd;
+        cl_ulong device_global_mem = 0;
 
-            cl_device_topology_amd amdtopo;
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+        device_param->device_global_mem = device_global_mem;
 
-            device_param->pcie_domain   = 0; // no attribute to query
-            device_param->pcie_bus      = amdtopo.pcie.bus;
-            device_param->pcie_device   = amdtopo.pcie.device;
-            device_param->pcie_function = amdtopo.pcie.function;
-          }
+        device_param->device_available_mem = 0;
 
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
-          {
-            cl_uint device_warp_size_nv;
+        // device_maxmem_alloc
 
-            // from deps/OpenCL-Headers/CL/cl_ext.h
-            #define CL_DEVICE_WARP_SIZE_NV                      0x4003
+        cl_ulong device_maxmem_alloc = 0;
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WARP_SIZE_NV, sizeof (device_warp_size_nv), &device_warp_size_nv, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-            device_param->kernel_preferred_wgs_multiple = device_warp_size_nv;
+        device_param->device_maxmem_alloc = device_maxmem_alloc;
 
-            cl_uint pci_bus_id_nv;  // is cl_uint the right type for them??
-            cl_uint pci_slot_id_nv;
+        if (device_param->device_host_unified_memory == 1)
+        {
+          // so, we actually have only half the memory because we need the same buffers on host side
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+          device_param->device_maxmem_alloc /= 2;
+        }
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+        // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
+        // testwise disabling that
+        //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
 
-            device_param->pcie_domain   = 0; // no attribute to query
-            device_param->pcie_bus      = (u8) (pci_bus_id_nv);
-            device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
-            device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
+        // max_work_group_size
 
-            int sm_minor = 0;
-            int sm_major = 0;
+        size_t device_maxworkgroup_size = 0;
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
+        device_param->device_maxworkgroup_size = device_maxworkgroup_size;
 
-            device_param->sm_minor = sm_minor;
-            device_param->sm_major = sm_major;
+        // max_clock_frequency
 
-            cl_uint kernel_exec_timeout = 0;
-
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
-
-            device_param->kernel_exec_timeout = kernel_exec_timeout;
+        cl_uint device_maxclock_frequency = 0;
 
-            // CPU burning loop damper
-            // Value is given as number between 0-100
-            // By default 8%
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-            device_param->spin_damp = (double) user_options->spin_damp / 100;
+        device_param->device_maxclock_frequency = device_maxclock_frequency;
 
-            if (user_options->stdout_flag == false)
-            {
-              // recommend CUDA
+        // device_endian_little
 
-              if ((backend_ctx->cuda == NULL) || (backend_ctx->nvrtc == NULL))
-              {
-                if (user_options->backend_ignore_cuda == false)
-                {
-                  if (backend_ctx->rc_cuda_init == -1)
-                  {
-                    event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA CUDA library.");
-                    event_log_warning (hashcat_ctx, NULL);
-                  }
-                  else
-                  {
-                    event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA CUDA library.");
-                    event_log_warning (hashcat_ctx, NULL);
-                  }
+        cl_bool device_endian_little = CL_FALSE;
 
-                  if (backend_ctx->rc_nvrtc_init == -1)
-                  {
-                    event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA RTC library.");
-                    event_log_warning (hashcat_ctx, NULL);
-                  }
-                  else
-                  {
-                    event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA RTC library.");
-                    event_log_warning (hashcat_ctx, NULL);
-                  }
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-                  event_log_warning (hashcat_ctx, "* Device #%u: CUDA SDK Toolkit not installed or incorrectly installed.", device_id + 1);
-                  event_log_warning (hashcat_ctx, "             CUDA SDK Toolkit required for proper device support and utilization.");
-                  event_log_warning (hashcat_ctx, "             Falling back to OpenCL runtime.");
+        if (device_endian_little == CL_FALSE)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1);
 
-                  event_log_warning (hashcat_ctx, NULL);
-                }
-              }
-            }
-          }
+          device_param->skipped = true;
         }
 
-        // instruction set
+        // device_available
 
-        // fixed values works only for nvidia devices
-        // dynamical values for amd see time intensive section below
+        cl_bool device_available = CL_FALSE;
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL) == -1)
         {
-          const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
+          device_param->skipped = true;
+          continue;
+        }
 
-          device_param->has_add   = (sm >= 12) ? true : false;
-          device_param->has_addc  = (sm >= 12) ? true : false;
-          device_param->has_sub   = (sm >= 12) ? true : false;
-          device_param->has_subc  = (sm >= 12) ? true : false;
-          device_param->has_bfe   = (sm >= 20) ? true : false;
-          device_param->has_lop3  = (sm >= 50) ? true : false;
-          device_param->has_mov64 = (sm >= 10) ? true : false;
-          device_param->has_prmt  = (sm >= 20) ? true : false;
+        if (device_available == CL_FALSE)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1);
+
+          device_param->skipped = true;
         }
 
-        // common driver check
+        // device_compiler_available
 
-        if (device_param->skipped == false)
+        cl_bool device_compiler_available = CL_FALSE;
+
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL) == -1)
         {
-          if ((user_options->force == false) && (user_options->backend_info == false))
-          {
-            if (opencl_device_type & CL_DEVICE_TYPE_CPU)
-            {
-              if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK)
-              {
-                bool intel_warn = false;
+          device_param->skipped = true;
+          continue;
+        }
 
-                // Intel OpenCL runtime 18
+        if (device_compiler_available == CL_FALSE)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1);
 
-                int opencl_driver1 = 0;
-                int opencl_driver2 = 0;
-                int opencl_driver3 = 0;
-                int opencl_driver4 = 0;
+          device_param->skipped = true;
+        }
 
-                const int res18 = sscanf (device_param->opencl_driver_version, "%d.%d.%d.%d", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4);
+        // device_execution_capabilities
 
-                if (res18 == 4)
-                {
-                  // so far all versions 18 are ok
-                }
-                else
-                {
-                  // Intel OpenCL runtime 16
+        cl_device_exec_capabilities device_execution_capabilities;
 
-                  float opencl_version = 0;
-                  int   opencl_build   = 0;
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-                  const int res16 = sscanf (device_param->opencl_device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build);
+        if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1);
 
-                  if (res16 == 2)
-                  {
-                    if (opencl_build < 25) intel_warn = true;
-                  }
-                }
+          device_param->skipped = true;
+        }
 
-                if (intel_warn == true)
-                {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->opencl_driver_version);
+        // device_extensions
 
-                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported runtime.");
-                  event_log_warning (hashcat_ctx, "See hashcat.net for the officially supported Intel OpenCL runtime.");
-                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
-                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-                  event_log_warning (hashcat_ctx, NULL);
+        size_t device_extensions_size;
 
-                  device_param->skipped = true;
-                  continue;
-                }
-              }
-            }
-            else if (opencl_device_type & CL_DEVICE_TYPE_GPU)
-            {
-              if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)
-              {
-                bool amd_warn = true;
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-                #if defined (__linux__)
-                // AMDGPU-PRO Driver 16.40 and higher
-                if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 2117) amd_warn = false;
-                // AMDGPU-PRO Driver 16.50 is known to be broken
-                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2236) amd_warn = true;
-                // AMDGPU-PRO Driver 16.60 is known to be broken
-                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2264) amd_warn = true;
-                // AMDGPU-PRO Driver 17.10 is known to be broken
-                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2348) amd_warn = true;
-                // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
-                #elif defined (_WIN)
-                // AMD Radeon Software 14.9 and higher, should be updated to 15.12
-                if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 1573) amd_warn = false;
-                #else
-                // we have no information about other os
-                if (amd_warn == true) amd_warn = false;
-                #endif
+        char *device_extensions = (char *) hcmalloc (device_extensions_size + 1);
 
-                if (amd_warn == true)
-                {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL) == -1)
+        {
+          device_param->skipped = true;
+          hcfree (device_extensions);
+          continue;
+        }
 
-                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver.");
-                  event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers.");
-                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
-                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-                  event_log_warning (hashcat_ctx, NULL);
+        if (strstr (device_extensions, "base_atomics") == 0)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1);
 
-                  device_param->skipped = true;
-                  continue;
-                }
-              }
+          device_param->skipped = true;
+        }
 
-              if (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)
-              {
-                int nv_warn = true;
+        if (strstr (device_extensions, "byte_addressable_store") == 0)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1);
 
-                int version_maj = 0;
-                int version_min = 0;
+          device_param->skipped = true;
+        }
 
-                const int r = sscanf (device_param->opencl_driver_version, "%d.%d", &version_maj, &version_min);
+        hcfree (device_extensions);
 
-                if (r == 2)
-                {
-                  // nvidia 441.x looks ok
+        // kernel_preferred_wgs_multiple
 
-                  if (version_maj == 440)
-                  {
-                    if (version_min >= 64)
-                    {
-                      nv_warn = false;
-                    }
-                  }
-                  else
-                  {
-                    // unknown version scheme, probably new driver version
+        // There is global query for this attribute on OpenCL that is not linked to a specific kernel, so we set it to a fixed value
+        // Later in the code, we add vendor specific extensions to query it
 
-                    nv_warn = false;
-                  }
-                }
-                else
-                {
-                  // unknown version scheme, probably new driver version
+        device_param->kernel_preferred_wgs_multiple = 8;
 
-                  nv_warn = false;
-                }
+        // device_local_mem_type
 
-                if (nv_warn == true)
-                {
-                  event_log_warning (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
-                  event_log_warning (hashcat_ctx, NULL);
+        cl_device_local_mem_type device_local_mem_type;
 
-                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver.");
-                  event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");
-                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
-                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-                  event_log_warning (hashcat_ctx, NULL);
-
-                  device_param->skipped = true;
-                  continue;
-                }
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-                if (device_param->sm_major < 5)
-                {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
-                }
+        device_param->device_local_mem_type = device_local_mem_type;
 
-                if (device_param->kernel_exec_timeout != 0)
-                {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
-                }
-              }
-            }
-          }
+        // device_max_constant_buffer_size
 
-          /**
-           * activate device
-           */
+        cl_ulong device_max_constant_buffer_size;
 
-          opencl_devices_active++;
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
         }
-      }
-    }
-  }
-
-  backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
-  backend_ctx->opencl_devices_active  = opencl_devices_active;
 
-  // all devices combined go into backend_* variables
+        if (device_local_mem_type == CL_LOCAL)
+        {
+          if (device_max_constant_buffer_size < 65536)
+          {
+            event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1);
 
-  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + hip_devices_cnt    + opencl_devices_cnt;
-  backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + opencl_devices_active;
+            device_param->skipped = true;
+          }
+        }
 
-  // find duplicate devices
+        // device_local_mem_size
 
-  //if ((cuda_devices_cnt > 0) && (hip_devices_cnt > 0) && (opencl_devices_cnt > 0))
-  //{
-    // using force here enables both devices, which is the worst possible outcome
-    // many users force by default, so this is not a good idea
+        cl_ulong device_local_mem_size = 0;
 
-    //if (user_options->force == false)
-    //{
-    backend_ctx_find_alias_devices (hashcat_ctx);
-    //{
-  //}
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL) == -1)
+        {
+          device_param->skipped = true;
+          continue;
+        }
 
-  if (backend_ctx->backend_devices_active == 0)
-  {
-    event_log_error (hashcat_ctx, "No devices found/left.");
+        if (device_local_mem_type == CL_LOCAL)
+        {
+          if (device_local_mem_size < 32768)
+          {
+            event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
 
-    return -1;
-  }
+            device_param->skipped = true;
+          }
+        }
 
-  // now we can calculate the number of parallel running hook threads based on
-  // the number cpu cores and the number of active compute devices
-  // unless overwritten by the user
+        // workaround inc!
+        // allocating all reported local memory causes jit to fail with: SC failed. No reason given.
+        // if we limit ourself to 32k it seems to work
 
-  if (user_options->hook_threads == HOOK_THREADS)
-  {
-    const u32 processor_count = hc_get_processor_count ();
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
+          {
+            if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
+            {
+              device_local_mem_size = MIN (device_local_mem_size, 32768);
+            }
+          }
+        }
 
-    const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
+        device_param->device_local_mem_size = device_local_mem_size;
 
-    user_options->hook_threads = processor_count_cu;
-  }
+        // handling known bugs on POCL
 
-  // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
+        // POCL < 1.9 doesn't like quotes in the include path, see:
+        // https://github.com/hashcat/hashcat/issues/2950
+        // https://github.com/pocl/pocl/issues/962
 
-  if (backend_ctx->backend_devices_filter != (u64) -1)
-  {
-    const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
+        // POCL < 1.5 and older LLVM versions are known to fail compiling kernels
+        // https://github.com/hashcat/hashcat/issues/2344
 
-    if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
-    {
-      event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
-      event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
+        // we need to inform the user to update
 
-      return -1;
-    }
-  }
+        if (opencl_platform_vendor_id == VENDOR_ID_POCL)
+        {
+          char *pocl_version_ptr = strstr (opencl_platform_version, "pocl ");
+          char *llvm_version_ptr = strstr (opencl_platform_version, "LLVM ");
 
-  // time or resource intensive operations which we do not run if the corresponding device was skipped by the user
+          if ((pocl_version_ptr != NULL) && (llvm_version_ptr != NULL))
+          {
+            bool pocl_skip = false;
 
-  if (backend_ctx->cuda)
-  {
-    // instruction test for cuda devices was replaced with fixed values (see above)
+            int pocl_maj = 0;
+            int pocl_min = 0;
 
-    /*
-    CUcontext cuda_context;
+            int pocl_bug_whitespace_on_path = 0;
+            int pocl_bug_kernel_compiling_failure = 0;
 
-    if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
+            const int res1 = sscanf (pocl_version_ptr, "pocl %d.%d", &pocl_maj, &pocl_min);
 
-    if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
+            if (res1 == 2)
+            {
+              const int pocl_version = (pocl_maj * 100) + pocl_min;
 
-    #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                                      \
-      device_param->has_add   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
-      device_param->has_addc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
-      device_param->has_sub   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
-      device_param->has_subc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
-      device_param->has_bfe   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                              \
-      device_param->has_lop3  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                                          \
-      device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");  \
-      device_param->has_prmt  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                             \
+              if (pocl_version < 109)
+              {
+                if (strchr (folder_config->cpath_real, ' ') != NULL)
+                {
+                  pocl_skip = true;
+                  pocl_bug_whitespace_on_path = 1;
+                }
 
-    if (backend_devices_idx > 0)
-    {
-      hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
+                if (pocl_version < 105)
+                {
+                  pocl_skip = true;
+                  pocl_bug_kernel_compiling_failure = 1;
+                }
+              }
+            }
 
-      if (is_same_device_type (device_param, device_param_prev) == true)
-      {
-        device_param->has_add   = device_param_prev->has_add;
-        device_param->has_addc  = device_param_prev->has_addc;
-        device_param->has_sub   = device_param_prev->has_sub;
-        device_param->has_subc  = device_param_prev->has_subc;
-        device_param->has_bfe   = device_param_prev->has_bfe;
-        device_param->has_lop3  = device_param_prev->has_lop3;
-        device_param->has_mov64 = device_param_prev->has_mov64;
-        device_param->has_prmt  = device_param_prev->has_prmt;
-      }
-      else
-      {
-        RUN_INSTRUCTION_CHECKS();
-      }
-    }
-    else
-    {
-      RUN_INSTRUCTION_CHECKS();
-    }
+            int llvm_maj = 0;
+            int llvm_min = 0;
 
-    #undef RUN_INSTRUCTION_CHECKS
+            const int res2 = sscanf (llvm_version_ptr, "LLVM %d.%d", &llvm_maj, &llvm_min);
 
-    if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1;
+            if (res2 == 2)
+            {
+              const int llvm_version = (llvm_maj * 100) + llvm_min;
 
-    */
-  }
+              if (llvm_version < 900)
+              {
+                pocl_skip = true;
+              }
+            }
 
-  if (backend_ctx->hip)
-  {
-    // TODO HIP?
-    // Maybe all devices supported by hip have these instructions guaranteed?
+            if (pocl_skip == true)
+            {
+              if (user_options->force == false)
+              {
+                event_log_error (hashcat_ctx, "* Device #%u: Outdated POCL OpenCL driver detected!", device_id + 1);
 
-    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
-    {
-      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
+                if (user_options->quiet == false)
+                {
+                  if (pocl_bug_kernel_compiling_failure == 1)
+                  {
+                    event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives.");
+                  }
 
-      if (device_param->is_hip == false) continue;
+                  if (pocl_bug_whitespace_on_path == 1)
+                  {
+                    event_log_warning (hashcat_ctx, "Consider moving hashcat to a path with no spaces if you want to use this POCL version.");
+                  }
 
-      device_param->has_vadd     = true;
-      device_param->has_vaddc    = true;
-      device_param->has_vadd_co  = true;
-      device_param->has_vaddc_co = true;
-      device_param->has_vsub     = true;
-      device_param->has_vsubb    = true;
-      device_param->has_vsub_co  = true;
-      device_param->has_vsubb_co = true;
-      device_param->has_vadd3    = true;
-      device_param->has_vbfe     = true;
-      device_param->has_vperm    = true;
-    }
-  }
+                  event_log_warning (hashcat_ctx, "We recommend using a version of POCL >= 1.9");
+                  event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
+                }
 
-  if (backend_ctx->ocl)
-  {
-    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
-    {
-      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
+                device_param->skipped = true;
+              }
+            }
+          }
+        }
 
-      if (device_param->is_opencl == false) continue;
+        char *opencl_device_version_lower = hcstrdup (opencl_device_version);
 
-      if (user_options->backend_info == false)
-      {
-        // do not ignore in case -I because user expects a value also for skipped devices
+        lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower));
 
-        if (device_param->skipped == true) continue;
-      }
-
-      /**
-       * create context for each device
-       */
-
-      cl_context context;
+        if ((strstr (opencl_device_version_lower, "beignet "))
+         || (strstr (opencl_device_version_lower, " beignet"))
+         || (strstr (opencl_device_version_lower, "mesa "))
+         || (strstr (opencl_device_version_lower, " mesa")))
+        {
+          // BEIGNET: https://github.com/hashcat/hashcat/issues/2243
+          // MESA:    https://github.com/hashcat/hashcat/issues/2269
 
-      /*
-      cl_context_properties properties[3];
+          if (user_options->force == false)
+          {
+            event_log_error (hashcat_ctx, "* Device #%u: Unstable OpenCL driver detected!", device_id + 1);
 
-      properties[0] = CL_CONTEXT_PLATFORM;
-      properties[1] = (cl_context_properties) device_param->opencl_platform;
-      properties[2] = 0;
+            if (user_options->quiet == false)
+            {
+              event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives.");
+              event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors.");
+              event_log_warning (hashcat_ctx, NULL);
+            }
 
-      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
-      */
+            device_param->skipped = true;
+          }
+        }
 
-      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+        hcfree (opencl_device_version_lower);
 
-      /**
-       * create command-queue
-       */
+        // Since some times we get reports from users about not working hashcat, dropping error messages like:
+        // CL_INVALID_COMMAND_QUEUE and CL_OUT_OF_RESOURCES
+        // Turns out that this is caused by Intel OpenCL runtime handling their GPU devices
+        // Disable such devices unless the user forces to use it
+        // This is successfully workaround with new threading model and new memory management
+        // Tested on Windows 10
+        // OpenCL.Version.: OpenCL C 2.1
+        // Driver.Version.: 23.20.16.4973
 
-      cl_command_queue command_queue;
+        /*
+        #if !defined (__APPLE__)
+        if (opencl_device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
+          {
+            if (user_options->force == false)
+            {
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             We are waiting for updated OpenCL drivers from Intel.");
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL);
 
-      if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+              device_param->skipped = true;
+            }
+          }
+        }
+        #endif // __APPLE__
+        */
 
-      // instruction set
+        // skipped
 
-      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
-      {
-        #define RUN_INSTRUCTION_CHECKS() \
-          device_param->has_vadd     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vaddc    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vadd_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vsub     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vsubb    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vsub_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vadd3    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vbfe     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32     %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vperm    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+        if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
+        {
+          device_param->skipped = true;
+        }
 
-        if (backend_devices_idx > 0)
+        if ((backend_ctx->opencl_device_types_filter & (opencl_device_type)) == 0)
         {
-          hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
+          device_param->skipped = true;
+        }
 
-          if (is_same_device_type (device_param, device_param_prev) == true)
-          {
-            device_param->has_vadd     = device_param_prev->has_vadd;
-            device_param->has_vaddc    = device_param_prev->has_vaddc;
-            device_param->has_vadd_co  = device_param_prev->has_vadd_co;
-            device_param->has_vaddc_co = device_param_prev->has_vaddc_co;
-            device_param->has_vsub     = device_param_prev->has_vsub;
-            device_param->has_vsubb    = device_param_prev->has_vsubb;
-            device_param->has_vsub_co  = device_param_prev->has_vsub_co;
-            device_param->has_vsubb_co = device_param_prev->has_vsubb_co;
-            device_param->has_vadd3    = device_param_prev->has_vadd3;
-            device_param->has_vbfe     = device_param_prev->has_vbfe;
-            device_param->has_vperm    = device_param_prev->has_vperm;
-          }
-          else
+        #if defined (__APPLE__)
+        if (opencl_device_type & CL_DEVICE_TYPE_GPU)
+        {
+          //if (user_options->force == false)
+          if (device_param->skipped == false)
           {
-            RUN_INSTRUCTION_CHECKS();
+            if (user_options->quiet == false)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Apple's OpenCL drivers (GPU) are known to be unreliable.", device_id + 1);
+              event_log_warning (hashcat_ctx, "             You have been warned.");
+              //event_log_warning (hashcat_ctx, "  There are many reports of false negatives and other issues.");
+              //event_log_warning (hashcat_ctx, "  This is not a hashcat issue. Other projects report issues with these drivers.");
+              //event_log_warning (hashcat_ctx, "  You can use --force to override, but do not report related errors. You have been warned.");
+              event_log_warning (hashcat_ctx, NULL);
+            }
+
+            //device_param->skipped = true;
           }
         }
-        else
+        #endif // __APPLE__
+
+        // driver_version
+
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, 0, NULL, &param_value_size) == -1)
         {
-          RUN_INSTRUCTION_CHECKS();
+          device_param->skipped = true;
+          continue;
         }
 
-        #undef RUN_INSTRUCTION_CHECKS
-      }
+        char *opencl_driver_version = (char *) hcmalloc (param_value_size);
 
-      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
-      {
-        // replaced with fixed values see non time intensive section above
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, param_value_size, opencl_driver_version, NULL) == -1)
+        {
+          device_param->skipped = true;
+          hcfree (opencl_driver_version);
+          continue;
+        }
 
-        /*
-        #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                          \
-          device_param->has_add   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
-          device_param->has_addc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
-          device_param->has_sub   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
-          device_param->has_subc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
-          device_param->has_bfe   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                        \
-          device_param->has_lop3  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                    \
-          device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
-          device_param->has_prmt  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                       \
+        device_param->opencl_driver_version = opencl_driver_version;
 
-        if (backend_devices_idx > 0)
+        // vendor specific
+
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
         {
-          hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
+          #if defined (__APPLE__)
+          if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
+          {
+            if (device_param->skipped == false)
+            {
+              need_iokit = true;
+            }
+          }
+          #endif
 
-          if (is_same_device_type (device_param, device_param_prev) == true)
+          #if defined (__linux__)
+          need_sysfs_cpu = true;
+          #endif
+        }
+
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
-            device_param->has_add   = device_param_prev->has_add;
-            device_param->has_addc  = device_param_prev->has_addc;
-            device_param->has_sub   = device_param_prev->has_sub;
-            device_param->has_subc  = device_param_prev->has_subc;
-            device_param->has_bfe   = device_param_prev->has_bfe;
-            device_param->has_lop3  = device_param_prev->has_lop3;
-            device_param->has_mov64 = device_param_prev->has_mov64;
-            device_param->has_prmt  = device_param_prev->has_prmt;
+            need_adl = true;
+
+            #if defined (__linux__)
+            need_sysfs_amdgpu = true;
+            #endif
           }
-          else
+
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
-            RUN_INSTRUCTION_CHECKS();
+            need_nvml = true;
+
+            #if defined (_WIN) || defined (__CYGWIN__)
+            need_nvapi = true;
+            #endif
           }
         }
-        else
+
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
         {
-          RUN_INSTRUCTION_CHECKS();
+          // they like this
+
+          device_param->kernel_preferred_wgs_multiple = 1;
         }
 
-        #undef RUN_INSTRUCTION_CHECKS
-        */
-      }
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
+          {
+            // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
+            #define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
 
-      // available device memory
-      // This test causes an GPU memory usage spike.
-      // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
-      // We will simply not run it if that device was skipped by the user.
+            // crazy, but apple does not support this query!
+            // the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel.
+            // so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default.
+            // we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0.
 
-      #define MAX_ALLOC_CHECKS_CNT  8192
-      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+            device_param->kernel_preferred_wgs_multiple = 0;
+          }
 
-      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
+          {
+            cl_uint device_wavefront_width_amd;
 
-      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
-      {
-        // OK, so the problem here is the following:
-        // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
-        // but there's no way to ask for available memory on the device.
-        // In combination, most OpenCL runtimes implementation of clCreateBuffer()
-        // are doing so called lazy memory allocation on the device.
-        // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
-        // running on the host we end up with an error type of this:
-        // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
-        // The clEnqueueNDRangeKernel() is because of the lazy allocation
-        // The best way to workaround this problem is if we would be able to ask for available memory,
-        // The idea here is to try to evaluate available memory by allocating it till it errors
+            // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
+            #define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
 
-        cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (device_wavefront_width_amd), &device_wavefront_width_amd, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-        u64 c;
+            device_param->kernel_preferred_wgs_multiple = device_wavefront_width_amd;
 
-        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-        {
-          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
-
-          cl_int CL_err;
+            cl_device_topology_amd amdtopo;
 
-          OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-          tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
+            device_param->pcie_domain   = 0; // no attribute to query
+            device_param->pcie_bus      = amdtopo.pcie.bus;
+            device_param->pcie_device   = amdtopo.pcie.device;
+            device_param->pcie_function = amdtopo.pcie.function;
+          }
 
-          if (CL_err != CL_SUCCESS)
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
-            c--;
-
-            break;
-          }
+            cl_uint device_warp_size_nv;
 
-          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
+            // from deps/OpenCL-Headers/CL/cl_ext.h
+            #define CL_DEVICE_WARP_SIZE_NV                      0x4003
 
-          u8 tmp_host[8];
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WARP_SIZE_NV, sizeof (device_warp_size_nv), &device_warp_size_nv, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-          if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+            device_param->kernel_preferred_wgs_multiple = device_warp_size_nv;
 
-          if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+            cl_uint pci_bus_id_nv;  // is cl_uint the right type for them??
+            cl_uint pci_slot_id_nv;
 
-          if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-          if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
-        }
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-        device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
+            device_param->pcie_domain   = 0; // no attribute to query
+            device_param->pcie_bus      = (u8) (pci_bus_id_nv);
+            device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
+            device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
 
-        if (c > 0)
-        {
-          device_param->device_available_mem *= c;
-        }
+            int sm_minor = 0;
+            int sm_major = 0;
 
-        // clean up
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-        {
-          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-          if (tmp_device[c] != NULL)
-          {
-            if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
-          }
-        }
+            device_param->sm_minor = sm_minor;
+            device_param->sm_major = sm_major;
 
-        hcfree (tmp_device);
-      }
+            cl_uint kernel_exec_timeout = 0;
 
-      hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
 
-      hc_clReleaseContext (hashcat_ctx, context);
+            device_param->kernel_exec_timeout = kernel_exec_timeout;
 
-      if (device_param->device_host_unified_memory == 1)
-      {
-        // so, we actually have only half the memory because we need the same buffers on host side
+            // CPU burning loop damper
+            // Value is given as number between 0-100
+            // By default 8%
 
-        device_param->device_available_mem /= 2;
-      }
-    }
-  }
+            device_param->spin_damp = (double) user_options->spin_damp / 100;
 
-  backend_ctx->target_msec  = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
+            if (user_options->stdout_flag == false)
+            {
+              // recommend CUDA
 
-  backend_ctx->need_adl           = need_adl;
-  backend_ctx->need_nvml          = need_nvml;
-  backend_ctx->need_nvapi         = need_nvapi;
-  backend_ctx->need_sysfs_amdgpu  = need_sysfs_amdgpu;
-  backend_ctx->need_sysfs_cpu     = need_sysfs_cpu;
-  backend_ctx->need_iokit         = need_iokit;
+              if ((backend_ctx->cuda == NULL) || (backend_ctx->nvrtc == NULL))
+              {
+                if (user_options->backend_ignore_cuda == false)
+                {
+                  if (backend_ctx->rc_cuda_init == -1)
+                  {
+                    event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA CUDA library.");
+                    event_log_warning (hashcat_ctx, NULL);
+                  }
+                  else
+                  {
+                    event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA CUDA library.");
+                    event_log_warning (hashcat_ctx, NULL);
+                  }
 
-  backend_ctx->comptime = comptime;
+                  if (backend_ctx->rc_nvrtc_init == -1)
+                  {
+                    event_log_warning (hashcat_ctx, "Failed to initialize NVIDIA RTC library.");
+                    event_log_warning (hashcat_ctx, NULL);
+                  }
+                  else
+                  {
+                    event_log_warning (hashcat_ctx, "Successfully initialized NVIDIA RTC library.");
+                    event_log_warning (hashcat_ctx, NULL);
+                  }
 
-  return 0;
-}
+                  event_log_warning (hashcat_ctx, "* Device #%u: CUDA SDK Toolkit not installed or incorrectly installed.", device_id + 1);
+                  event_log_warning (hashcat_ctx, "             CUDA SDK Toolkit required for proper device support and utilization.");
+                  event_log_warning (hashcat_ctx, "             Falling back to OpenCL runtime.");
 
-void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+                  event_log_warning (hashcat_ctx, NULL);
+                }
+              }
+            }
+          }
+        }
 
-  if (backend_ctx->enabled == false) return;
+        // instruction set
 
-  for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < backend_ctx->opencl_platforms_cnt; opencl_platforms_idx++)
-  {
-    hcfree (backend_ctx->opencl_platforms_devices[opencl_platforms_idx]);
-    hcfree (backend_ctx->opencl_platforms_name[opencl_platforms_idx]);
-    hcfree (backend_ctx->opencl_platforms_vendor[opencl_platforms_idx]);
-    hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
-  }
+        // fixed values works only for nvidia devices
+        // dynamical values for amd see time intensive section below
 
-  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
-  {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
+        {
+          const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
 
-    hcfree (device_param->device_name);
+          device_param->has_add   = (sm >= 12) ? true : false;
+          device_param->has_addc  = (sm >= 12) ? true : false;
+          device_param->has_sub   = (sm >= 12) ? true : false;
+          device_param->has_subc  = (sm >= 12) ? true : false;
+          device_param->has_bfe   = (sm >= 20) ? true : false;
+          device_param->has_lop3  = (sm >= 50) ? true : false;
+          device_param->has_mov64 = (sm >= 10) ? true : false;
+          device_param->has_prmt  = (sm >= 20) ? true : false;
+        }
 
-    if (device_param->is_opencl == true)
-    {
-      hcfree (device_param->opencl_driver_version);
-      hcfree (device_param->opencl_device_version);
-      hcfree (device_param->opencl_device_c_version);
-      hcfree (device_param->opencl_device_vendor);
-    }
-  }
+        // common driver check
 
-  backend_ctx->backend_devices_cnt    = 0;
-  backend_ctx->backend_devices_active = 0;
-  backend_ctx->cuda_devices_cnt       = 0;
-  backend_ctx->cuda_devices_active    = 0;
-  backend_ctx->hip_devices_cnt        = 0;
-  backend_ctx->hip_devices_active     = 0;
-  backend_ctx->opencl_devices_cnt     = 0;
-  backend_ctx->opencl_devices_active  = 0;
+        if (device_param->skipped == false)
+        {
+          if ((user_options->force == false) && (user_options->backend_info == false))
+          {
+            if (opencl_device_type & CL_DEVICE_TYPE_CPU)
+            {
+              if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK)
+              {
+                bool intel_warn = false;
 
-  backend_ctx->need_adl           = false;
-  backend_ctx->need_nvml          = false;
-  backend_ctx->need_nvapi         = false;
-  backend_ctx->need_sysfs_amdgpu  = false;
-  backend_ctx->need_sysfs_cpu     = false;
-  backend_ctx->need_iokit         = false;
-}
+                // Intel OpenCL runtime 18
 
-void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-  hashconfig_t  *hashconfig  = hashcat_ctx->hashconfig;
+                int opencl_driver1 = 0;
+                int opencl_driver2 = 0;
+                int opencl_driver3 = 0;
+                int opencl_driver4 = 0;
 
-  if (backend_ctx->enabled == false) return;
+                const int res18 = sscanf (device_param->opencl_driver_version, "%d.%d.%d.%d", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4);
 
-  for (int backend_devices_cnt_src = 0; backend_devices_cnt_src < backend_ctx->backend_devices_cnt; backend_devices_cnt_src++)
-  {
-    hc_device_param_t *device_param_src = &backend_ctx->devices_param[backend_devices_cnt_src];
+                if (res18 == 4)
+                {
+                  // so far all versions 18 are ok
+                }
+                else
+                {
+                  // Intel OpenCL runtime 16
 
-    if (device_param_src->skipped == true) continue;
-    if (device_param_src->skipped_warning == true) continue;
+                  float opencl_version = 0;
+                  int   opencl_build   = 0;
 
-    for (int backend_devices_cnt_dst = backend_devices_cnt_src + 1; backend_devices_cnt_dst < backend_ctx->backend_devices_cnt; backend_devices_cnt_dst++)
-    {
-      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[backend_devices_cnt_dst];
+                  const int res16 = sscanf (device_param->opencl_device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build);
 
-      if (device_param_dst->skipped == true) continue;
-      if (device_param_dst->skipped_warning == true) continue;
+                  if (res16 == 2)
+                  {
+                    if (opencl_build < 25) intel_warn = true;
+                  }
+                }
 
-      if (is_same_device_type (device_param_src, device_param_dst) == false) continue;
+                if (intel_warn == true)
+                {
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->opencl_driver_version);
 
-      device_param_dst->kernel_accel   = device_param_src->kernel_accel;
-      device_param_dst->kernel_loops   = device_param_src->kernel_loops;
-      device_param_dst->kernel_threads = device_param_src->kernel_threads;
-
-      const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param_dst->device_processors) * device_param_dst->kernel_threads;
-
-      device_param_dst->hardware_power = hardware_power;
+                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported runtime.");
+                  event_log_warning (hashcat_ctx, "See hashcat.net for the officially supported Intel OpenCL runtime.");
+                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
 
-      const u32 kernel_power = device_param_dst->hardware_power * device_param_dst->kernel_accel;
+                  device_param->skipped = true;
+                  continue;
+                }
+              }
+            }
+            else if (opencl_device_type & CL_DEVICE_TYPE_GPU)
+            {
+              if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)
+              {
+                bool amd_warn = true;
 
-      device_param_dst->kernel_power = kernel_power;
-    }
-  }
-}
+                #if defined (__linux__)
+                // AMDGPU-PRO Driver 16.40 and higher
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 2117) amd_warn = false;
+                // AMDGPU-PRO Driver 16.50 is known to be broken
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2236) amd_warn = true;
+                // AMDGPU-PRO Driver 16.60 is known to be broken
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2264) amd_warn = true;
+                // AMDGPU-PRO Driver 17.10 is known to be broken
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2348) amd_warn = true;
+                // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
+                #elif defined (_WIN)
+                // AMD Radeon Software 14.9 and higher, should be updated to 15.12
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 1573) amd_warn = false;
+                #else
+                // we have no information about other os
+                if (amd_warn == true) amd_warn = false;
+                #endif
 
-void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
-  status_ctx_t         *status_ctx          = hashcat_ctx->status_ctx;
-  user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
-  user_options_t       *user_options        = hashcat_ctx->user_options;
+                if (amd_warn == true)
+                {
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
 
-  if (backend_ctx->enabled == false) return;
+                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver.");
+                  event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers.");
+                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
 
-  u32 kernel_power_all = 0;
+                  device_param->skipped = true;
+                  continue;
+                }
+              }
 
-  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
-  {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+              if (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)
+              {
+                int nv_warn = true;
 
-    if (device_param->skipped == true) continue;
-    if (device_param->skipped_warning == true) continue;
+                int version_maj = 0;
+                int version_min = 0;
 
-    kernel_power_all += device_param->kernel_power;
-  }
+                const int r = sscanf (device_param->opencl_driver_version, "%d.%d", &version_maj, &version_min);
 
-  backend_ctx->kernel_power_all = kernel_power_all;
+                if (r == 2)
+                {
+                  // nvidia 441.x looks ok
 
-  /*
-   * Inform user about possible slow speeds
-   */
+                  if (version_maj == 440)
+                  {
+                    if (version_min >= 64)
+                    {
+                      nv_warn = false;
+                    }
+                  }
+                  else
+                  {
+                    // unknown version scheme, probably new driver version
 
-  if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK))
-  {
-    if (status_ctx->words_base < kernel_power_all)
-    {
-      if (user_options->quiet == false)
-      {
-        clear_prompt (hashcat_ctx);
+                    nv_warn = false;
+                  }
+                }
+                else
+                {
+                  // unknown version scheme, probably new driver version
 
-        event_log_advice (hashcat_ctx, "The wordlist or mask that you are using is too small.");
-        event_log_advice (hashcat_ctx, "This means that hashcat cannot use the full parallel power of your device(s).");
-        event_log_advice (hashcat_ctx, "Unless you supply more work, your cracking speed will drop.");
-        event_log_advice (hashcat_ctx, "For tips on supplying more work, see: https://hashcat.net/faq/morework");
-        event_log_advice (hashcat_ctx, NULL);
-      }
-    }
-  }
-}
+                  nv_warn = false;
+                }
 
-void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
-{
-  combinator_ctx_t     *combinator_ctx      = hashcat_ctx->combinator_ctx;
-  hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
-  hashes_t             *hashes              = hashcat_ctx->hashes;
-  mask_ctx_t           *mask_ctx            = hashcat_ctx->mask_ctx;
-  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
-  straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
-  user_options_t       *user_options        = hashcat_ctx->user_options;
-  user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
+                if (nv_warn == true)
+                {
+                  event_log_warning (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
+                  event_log_warning (hashcat_ctx, NULL);
 
-  if (backend_ctx->enabled == false) return;
+                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported driver.");
+                  event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");
+                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
 
-  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
-  {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+                  device_param->skipped = true;
+                  continue;
+                }
 
-    if (device_param->skipped == true) continue;
-    if (device_param->skipped_warning == true) continue;
+                if (device_param->sm_major < 5)
+                {
+                  if (user_options->quiet == false)
+                  {
+                    event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+                    event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+                    event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+                  }
+                }
 
-    device_param->kernel_loops_min = device_param->kernel_loops_min_sav;
-    device_param->kernel_loops_max = device_param->kernel_loops_max_sav;
+                if (device_param->kernel_exec_timeout != 0)
+                {
+                  if (user_options->quiet == false)
+                  {
+                    event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+                    event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+                    event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+                  }
+                }
+              }
+            }
+          }
 
-    if (device_param->kernel_loops_min < device_param->kernel_loops_max)
-    {
-      u32 innerloop_cnt = 0;
+          /**
+           * activate device
+           */
 
-      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-      {
-        if (user_options->slow_candidates == true)
-        {
-          innerloop_cnt = 1;
-        }
-        else
-        {
-          if      (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)  innerloop_cnt = MIN (KERNEL_RULES, (u32) straight_ctx->kernel_rules_cnt);
-          else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)     innerloop_cnt = MIN (KERNEL_COMBS, (u32) combinator_ctx->combs_cnt);
-          else if (user_options_extra->attack_kern == ATTACK_KERN_BF)        innerloop_cnt = MIN (KERNEL_BFS,   (u32) mask_ctx->bfs_cnt);
+          opencl_devices_active++;
         }
       }
-      else
-      {
-        innerloop_cnt = hashes->salts_buf[0].salt_iter;
-      }
-
-      if ((innerloop_cnt >= device_param->kernel_loops_min) &&
-          (innerloop_cnt <= device_param->kernel_loops_max))
-      {
-        device_param->kernel_loops_max = innerloop_cnt;
-      }
     }
   }
-}
 
-static int get_cuda_kernel_wgs (hashcat_ctx_t *hashcat_ctx, CUfunction function, u32 *result)
-{
-  int max_threads_per_block;
+  backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
+  backend_ctx->opencl_devices_active  = opencl_devices_active;
 
-  if (hc_cuFuncGetAttribute (hashcat_ctx, &max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1;
+  // all devices combined go into backend_* variables
 
-  *result = (u32) max_threads_per_block;
+  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + hip_devices_cnt    + metal_devices_cnt    + opencl_devices_cnt;
+  backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + metal_devices_active + opencl_devices_active;
 
-  return 0;
-}
+  // find duplicate devices
 
-static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result)
-{
-  int shared_size_bytes;
+  //if ((cuda_devices_cnt > 0) && (hip_devices_cnt > 0) && (opencl_devices_cnt > 0))
+  //{
+    // using force here enables both devices, which is the worst possible outcome
+    // many users force by default, so this is not a good idea
 
-  if (hc_cuFuncGetAttribute (hashcat_ctx, &shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1;
+    //if (user_options->force == false)
+    //{
+    backend_ctx_find_alias_devices (hashcat_ctx);
+    //{
+  //}
 
-  *result = (u64) shared_size_bytes;
+  if (backend_ctx->backend_devices_active == 0)
+  {
+    event_log_error (hashcat_ctx, "No devices found/left.");
 
-  return 0;
-}
+    return -1;
+  }
 
-static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u32 *result)
-{
-  int max_threads_per_block;
+  // now we can calculate the number of parallel running hook threads based on
+  // the number cpu cores and the number of active compute devices
+  // unless overwritten by the user
 
-  if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1;
+  if (user_options->hook_threads == HOOK_THREADS)
+  {
+    const u32 processor_count = hc_get_processor_count ();
 
-  *result = (u32) max_threads_per_block;
+    const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
 
-  return 0;
-}
+    user_options->hook_threads = processor_count_cu;
+  }
 
-static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u64 *result)
-{
-  int shared_size_bytes;
+  // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
-  if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1;
+  if (backend_ctx->backend_devices_filter != (u64) -1)
+  {
+    const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
 
-  *result = (u64) shared_size_bytes;
+    if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
+    {
+      event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
+      event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
 
-  return 0;
-}
+      return -1;
+    }
+  }
 
-static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
-{
-  size_t work_group_size = 0;
-
-  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL) == -1) return -1;
-
-  u32 kernel_threads = (u32) work_group_size;
-
-  size_t compile_work_group_size[3] = { 0, 0, 0 };
-
-  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL) == -1) return -1;
-
-  const size_t cwgs_total = compile_work_group_size[0] * compile_work_group_size[1] * compile_work_group_size[2];
+  // time or resource intensive operations which we do not run if the corresponding device was skipped by the user
 
-  if (cwgs_total > 0)
+  if (backend_ctx->cuda)
   {
-    kernel_threads = MIN (kernel_threads, (u32) cwgs_total);
-  }
-
-  *result = kernel_threads;
-
-  return 0;
-}
-
-static int get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
-{
-  size_t preferred_work_group_size_multiple = 0;
-
-  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof (preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL) == -1) return -1;
-
-  *result = (u32) preferred_work_group_size_multiple;
-
-  return 0;
-}
-
-static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result)
-{
-  cl_ulong local_mem_size = 0;
+    // instruction test for cuda devices was replaced with fixed values (see above)
 
-  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (local_mem_size), &local_mem_size, NULL) == -1) return -1;
+    /*
+    CUcontext cuda_context;
 
-  *result = local_mem_size;
+    if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
 
-  return 0;
-}
+    if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
 
-static int get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result)
-{
-  cl_ulong dynamic_local_mem_size = 0;
+    #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                                      \
+      device_param->has_add   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
+      device_param->has_addc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
+      device_param->has_sub   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
+      device_param->has_subc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
+      device_param->has_bfe   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                              \
+      device_param->has_lop3  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                                          \
+      device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");  \
+      device_param->has_prmt  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                             \
 
-  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (dynamic_local_mem_size), &dynamic_local_mem_size, NULL) == -1) return -1;
+    if (backend_devices_idx > 0)
+    {
+      hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
 
-  // unknown how to query this information in OpenCL
-  // we therefore reset to zero
-  // the above call to hc_clGetKernelWorkGroupInfo() is just to avoid compiler warnings
+      if (is_same_device_type (device_param, device_param_prev) == true)
+      {
+        device_param->has_add   = device_param_prev->has_add;
+        device_param->has_addc  = device_param_prev->has_addc;
+        device_param->has_sub   = device_param_prev->has_sub;
+        device_param->has_subc  = device_param_prev->has_subc;
+        device_param->has_bfe   = device_param_prev->has_bfe;
+        device_param->has_lop3  = device_param_prev->has_lop3;
+        device_param->has_mov64 = device_param_prev->has_mov64;
+        device_param->has_prmt  = device_param_prev->has_prmt;
+      }
+      else
+      {
+        RUN_INSTRUCTION_CHECKS();
+      }
+    }
+    else
+    {
+      RUN_INSTRUCTION_CHECKS();
+    }
 
-  dynamic_local_mem_size = 0;
+    #undef RUN_INSTRUCTION_CHECKS
 
-  *result = dynamic_local_mem_size;
+    if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1;
 
-  return 0;
-}
+    */
+  }
 
-static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module)
-{
-  const hashconfig_t    *hashconfig    = hashcat_ctx->hashconfig;
-  const folder_config_t *folder_config = hashcat_ctx->folder_config;
-  const user_options_t  *user_options  = hashcat_ctx->user_options;
+  if (backend_ctx->hip)
+  {
+    // TODO HIP?
+    // Maybe all devices supported by hip have these instructions guaranteed?
 
-  bool cached = true;
+    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
+    {
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
 
-  if (cache_disable == true)
-  {
-    cached = false;
-  }
+      if (device_param->is_hip == false) continue;
 
-  if (hc_path_read (cached_file) == false)
-  {
-    cached = false;
+      device_param->has_vadd     = true;
+      device_param->has_vaddc    = true;
+      device_param->has_vadd_co  = true;
+      device_param->has_vaddc_co = true;
+      device_param->has_vsub     = true;
+      device_param->has_vsubb    = true;
+      device_param->has_vsub_co  = true;
+      device_param->has_vsubb_co = true;
+      device_param->has_vadd3    = true;
+      device_param->has_vbfe     = true;
+      device_param->has_vperm    = true;
+    }
   }
 
-  if (hc_path_is_empty (cached_file) == true)
+  #if defined (__APPLE__)
+  if (backend_ctx->mtl)
   {
-    cached = false;
-  }
+    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
+    {
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
 
-  /**
-   * kernel compile or load
-   */
+      if (device_param->is_metal == false) continue;
 
-  size_t kernel_lengths_buf = 0;
+      if (user_options->backend_info == false)
+      {
+        // do not ignore in case -I because user expects a value also for skipped devices
 
-  size_t *kernel_lengths = &kernel_lengths_buf;
+        if (device_param->skipped == true) continue;
+      }
 
-  char *kernel_sources_buf = NULL;
+      /**
+       * create command-queue
+       */
 
-  char **kernel_sources = &kernel_sources_buf;
+      mtl_command_queue command_queue;
 
-  if (cached == false)
-  {
-    #if defined (DEBUG)
-    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache. Please be patient...", device_param->device_id + 1, filename_from_filepath (cached_file));
-    #endif
+      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &command_queue) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-    if (read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources) == false) return false;
+      // available device memory
+      // This test causes an GPU memory usage spike.
+      // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
+      // We will simply not run it if that device was skipped by the user.
 
-    if (device_param->is_cuda == true)
-    {
-      nvrtcProgram program;
+      #define MAX_ALLOC_CHECKS_CNT  8192
+      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
 
-      if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
+      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
 
-      char **nvrtc_options = (char **) hccalloc (5 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+      {
+        // following the same logic as for OpenCL, explained later
 
-      nvrtc_options[0] = "--restrict";
-      nvrtc_options[1] = "--device-as-default-execution-space";
-      nvrtc_options[2] = "--gpu-architecture";
+        mtl_mem *tmp_device = (mtl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (mtl_mem));
 
-      hc_asprintf (&nvrtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
+        u64 c;
 
-      // untested on windows, but it should work
-      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
-      hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", "OpenCL");
-      #else
-      hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
-      #endif
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
 
-      char *nvrtc_options_string = hcstrdup (build_options_buf);
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, MAX_ALLOC_CHECKS_SIZE, NULL, &tmp_device[c]) == -1)
+          {
+            c--;
 
-      const int num_options = 5 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 5);
+            break;
+          }
 
-      const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
+          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
 
-      hcfree (nvrtc_options_string);
-      hcfree (nvrtc_options);
+          u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
 
-      size_t build_log_size = 0;
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
 
-      hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
+        }
 
-      #if defined (DEBUG)
-      if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1))
-      #else
-      if (rc_nvrtcCompileProgram == -1)
-      #endif
-      {
-        char *build_log = (char *) hcmalloc (build_log_size + 1);
+        device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
 
-        if (hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log) == -1)
+        if (c > 0)
         {
-          hcfree (build_log);
-
-          return false;
+          device_param->device_available_mem *= c;
         }
 
-        build_log[build_log_size] = 0;
+        // clean up
 
-        puts (build_log);
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
 
-        hcfree (build_log);
+          if (tmp_device[c] != NULL)
+          {
+            if (hc_mtlReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
+          }
+        }
+
+        hcfree (tmp_device);
       }
 
-      if (rc_nvrtcCompileProgram == -1)
+      hc_mtlReleaseCommandQueue (hashcat_ctx, command_queue);
+
+      if (device_param->device_host_unified_memory == 1)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+        // so, we actually have only half the memory because we need the same buffers on host side
 
-        return false;
+        device_param->device_available_mem /= 2;
       }
+    }
+  }
+  #endif // __APPLE__
 
-      size_t binary_size = 0;
-
-      if (hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size) == -1) return false;
+  if (backend_ctx->ocl)
+  {
+    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
+    {
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
 
-      char *binary = (char *) hcmalloc (binary_size);
+      if (device_param->is_opencl == false) continue;
 
-      if (hc_nvrtcGetPTX (hashcat_ctx, program, binary) == -1) return false;
+      if (user_options->backend_info == false)
+      {
+        // do not ignore in case -I because user expects a value also for skipped devices
 
-      if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
+        if (device_param->skipped == true) continue;
+      }
 
-      #define LOG_SIZE 8192
+      /**
+       * create context for each device
+       */
 
-      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+      cl_context context;
 
-      int mod_cnt = 6;
-
-      CUjit_option mod_opts[7];
-      void *mod_vals[7];
-
-      mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
-      mod_vals[0] = (void *) 0;
+      /*
+      cl_context_properties properties[3];
 
-      mod_opts[1] = CU_JIT_LOG_VERBOSE;
-      mod_vals[1] = (void *) 1;
+      properties[0] = CL_CONTEXT_PLATFORM;
+      properties[1] = (cl_context_properties) device_param->opencl_platform;
+      properties[2] = 0;
 
-      mod_opts[2] = CU_JIT_INFO_LOG_BUFFER;
-      mod_vals[2] = (void *) mod_info_log;
+      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
+      */
 
-      mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-      mod_vals[3] = (void *) LOG_SIZE;
+      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-      mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
-      mod_vals[4] = (void *) mod_error_log;
+      /**
+       * create command-queue
+       */
 
-      mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
-      mod_vals[5] = (void *) LOG_SIZE;
+      cl_command_queue command_queue;
 
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
+      if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1)
       {
-        mod_opts[6] = CU_JIT_MAX_REGISTERS;
-        mod_vals[6] = (void *) 128;
-
-        mod_cnt++;
+        device_param->skipped = true;
+        continue;
       }
 
-      #if defined (WITH_CUBIN)
+      // instruction set
 
-      char *jit_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
+      {
+        #define RUN_INSTRUCTION_CHECKS() \
+          device_param->has_vadd     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vaddc    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vadd_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vsub     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vsubb    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vsub_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vadd3    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vbfe     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32     %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vperm    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
 
-      int jit_cnt = 6;
+        if (backend_devices_idx > 0)
+        {
+          hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
 
-      CUjit_option jit_opts[7];
-      void *jit_vals[7];
+          if (is_same_device_type (device_param, device_param_prev) == true)
+          {
+            device_param->has_vadd     = device_param_prev->has_vadd;
+            device_param->has_vaddc    = device_param_prev->has_vaddc;
+            device_param->has_vadd_co  = device_param_prev->has_vadd_co;
+            device_param->has_vaddc_co = device_param_prev->has_vaddc_co;
+            device_param->has_vsub     = device_param_prev->has_vsub;
+            device_param->has_vsubb    = device_param_prev->has_vsubb;
+            device_param->has_vsub_co  = device_param_prev->has_vsub_co;
+            device_param->has_vsubb_co = device_param_prev->has_vsubb_co;
+            device_param->has_vadd3    = device_param_prev->has_vadd3;
+            device_param->has_vbfe     = device_param_prev->has_vbfe;
+            device_param->has_vperm    = device_param_prev->has_vperm;
+          }
+          else
+          {
+            RUN_INSTRUCTION_CHECKS();
+          }
+        }
+        else
+        {
+          RUN_INSTRUCTION_CHECKS();
+        }
 
-      jit_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
-      jit_vals[0] = (void *) 0;
+        #undef RUN_INSTRUCTION_CHECKS
+      }
 
-      jit_opts[1] = CU_JIT_LOG_VERBOSE;
-      jit_vals[1] = (void *) 1;
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
+      {
+        // replaced with fixed values see non time intensive section above
 
-      jit_opts[2] = CU_JIT_INFO_LOG_BUFFER;
-      jit_vals[2] = (void *) jit_info_log;
+        /*
+        #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                          \
+          device_param->has_add   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
+          device_param->has_addc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
+          device_param->has_sub   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
+          device_param->has_subc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
+          device_param->has_bfe   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                        \
+          device_param->has_lop3  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                    \
+          device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
+          device_param->has_prmt  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                       \
 
-      jit_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-      jit_vals[3] = (void *) LOG_SIZE;
+        if (backend_devices_idx > 0)
+        {
+          hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
 
-      jit_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
-      jit_vals[4] = (void *) jit_error_log;
+          if (is_same_device_type (device_param, device_param_prev) == true)
+          {
+            device_param->has_add   = device_param_prev->has_add;
+            device_param->has_addc  = device_param_prev->has_addc;
+            device_param->has_sub   = device_param_prev->has_sub;
+            device_param->has_subc  = device_param_prev->has_subc;
+            device_param->has_bfe   = device_param_prev->has_bfe;
+            device_param->has_lop3  = device_param_prev->has_lop3;
+            device_param->has_mov64 = device_param_prev->has_mov64;
+            device_param->has_prmt  = device_param_prev->has_prmt;
+          }
+          else
+          {
+            RUN_INSTRUCTION_CHECKS();
+          }
+        }
+        else
+        {
+          RUN_INSTRUCTION_CHECKS();
+        }
 
-      jit_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
-      jit_vals[5] = (void *) LOG_SIZE;
+        #undef RUN_INSTRUCTION_CHECKS
+        */
+      }
 
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
-      {
-        jit_opts[6] = CU_JIT_MAX_REGISTERS;
-        jit_vals[6] = (void *) 128;
+      // available device memory
+      // This test causes an GPU memory usage spike.
+      // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
+      // We will simply not run it if that device was skipped by the user.
 
-        jit_cnt++;
-      }
+      #define MAX_ALLOC_CHECKS_CNT  8192
+      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
 
-      CUlinkState state;
+      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
 
-      if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1)
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
+        // OK, so the problem here is the following:
+        // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
+        // but there's no way to ask for available memory on the device.
+        // In combination, most OpenCL runtimes implementation of clCreateBuffer()
+        // are doing so called lazy memory allocation on the device.
+        // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
+        // running on the host we end up with an error type of this:
+        // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
+        // The clEnqueueNDRangeKernel() is because of the lazy allocation
+        // The best way to workaround this problem is if we would be able to ask for available memory,
+        // The idea here is to try to evaluate available memory by allocating it till it errors
 
-        return false;
-      }
+        cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
 
-      if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, kernel_name, 0, NULL, NULL) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
+        u64 c;
 
-        return false;
-      }
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
 
-      void *cubin = NULL;
+          cl_int CL_err;
 
-      size_t cubin_size = 0;
+          OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
 
-      if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
+          tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
 
-        return false;
-      }
+          if (CL_err != CL_SUCCESS)
+          {
+            c--;
 
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", jit_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
+            break;
+          }
 
-      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, cubin, mod_cnt, mod_opts, mod_vals) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
+          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
 
-        return false;
-      }
+          u8 tmp_host[8];
 
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
+          if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+          if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
 
-      if (cache_disable == false)
-      {
-        if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false;
-      }
+          if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+          if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+        }
 
-      if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return false;
+        device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
 
-      hcfree (jit_info_log);
-      hcfree (jit_error_log);
+        if (c > 0)
+        {
+          device_param->device_available_mem *= c;
+        }
 
-      #else
+        // clean up
 
-      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
 
-        return false;
+          if (tmp_device[c] != NULL)
+          {
+            if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
+          }
+        }
+
+        hcfree (tmp_device);
       }
 
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
+      hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
 
-      if (cache_disable == false)
+      hc_clReleaseContext (hashcat_ctx, context);
+
+      if (device_param->device_host_unified_memory == 1)
       {
-        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
+        // so, we actually have only half the memory because we need the same buffers on host side
+
+        device_param->device_available_mem /= 2;
       }
+    }
+  }
 
-      #endif
+  backend_ctx->target_msec  = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
 
-      hcfree (mod_info_log);
-      hcfree (mod_error_log);
+  backend_ctx->need_adl           = need_adl;
+  backend_ctx->need_nvml          = need_nvml;
+  backend_ctx->need_nvapi         = need_nvapi;
+  backend_ctx->need_sysfs_amdgpu  = need_sysfs_amdgpu;
+  backend_ctx->need_sysfs_cpu     = need_sysfs_cpu;
+  backend_ctx->need_iokit         = need_iokit;
 
-      hcfree (binary);
-    }
+  backend_ctx->comptime = comptime;
 
-    if (device_param->is_hip == true)
-    {
-      hiprtcProgram program;
+  return 0;
+}
 
-      if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
+void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-      char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
+  if (backend_ctx->enabled == false) return;
 
-      //hiprtc_options[0] = "--restrict";
-      //hiprtc_options[1] = "--device-as-default-execution-space";
-      //hiprtc_options[2] = "--gpu-architecture";
+  for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < backend_ctx->opencl_platforms_cnt; opencl_platforms_idx++)
+  {
+    hcfree (backend_ctx->opencl_platforms_devices[opencl_platforms_idx]);
+    hcfree (backend_ctx->opencl_platforms_name[opencl_platforms_idx]);
+    hcfree (backend_ctx->opencl_platforms_vendor[opencl_platforms_idx]);
+    hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
+  }
 
-      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
+  {
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
-      /* 4.3 linux
-      hiprtc_options[1] = "-I";
-      hiprtc_options[2] = "/opt/rocm/hip/bin/include";
-      hiprtc_options[3] = "-I";
-      hiprtc_options[4] = "/opt/rocm/include";
-      hiprtc_options[5] = "-I";
-      */
+    hcfree (device_param->device_name);
 
-      hiprtc_options[1] = "-nocudainc";
-      hiprtc_options[2] = "-nocudalib";
-      hiprtc_options[3] = "";
-      hiprtc_options[4] = "";
+    if (device_param->is_opencl == true)
+    {
+      hcfree (device_param->opencl_driver_version);
+      hcfree (device_param->opencl_device_version);
+      hcfree (device_param->opencl_device_c_version);
+      hcfree (device_param->opencl_device_vendor);
+    }
+  }
 
-      // untested but it should work
-      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
-      hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", "OpenCL");
-      #else
-      hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
-      #endif
+  backend_ctx->backend_devices_cnt    = 0;
+  backend_ctx->backend_devices_active = 0;
+  backend_ctx->cuda_devices_cnt       = 0;
+  backend_ctx->cuda_devices_active    = 0;
+  backend_ctx->hip_devices_cnt        = 0;
+  backend_ctx->hip_devices_active     = 0;
+  backend_ctx->metal_devices_cnt      = 0;
+  backend_ctx->metal_devices_active   = 0;
+  backend_ctx->opencl_devices_cnt     = 0;
+  backend_ctx->opencl_devices_active  = 0;
 
-      char *hiprtc_options_string = hcstrdup (build_options_buf);
+  backend_ctx->need_adl           = false;
+  backend_ctx->need_nvml          = false;
+  backend_ctx->need_nvapi         = false;
+  backend_ctx->need_sysfs_amdgpu  = false;
+  backend_ctx->need_sysfs_cpu     = false;
+  backend_ctx->need_iokit         = false;
+}
 
-      const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6);
+void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+  hashconfig_t  *hashconfig  = hashcat_ctx->hashconfig;
 
-      const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options);
+  if (backend_ctx->enabled == false) return;
 
-      hcfree (hiprtc_options_string);
-      hcfree (hiprtc_options);
+  for (int backend_devices_cnt_src = 0; backend_devices_cnt_src < backend_ctx->backend_devices_cnt; backend_devices_cnt_src++)
+  {
+    hc_device_param_t *device_param_src = &backend_ctx->devices_param[backend_devices_cnt_src];
 
-      size_t build_log_size = 0;
+    if (device_param_src->skipped == true) continue;
+    if (device_param_src->skipped_warning == true) continue;
 
-      hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+    for (int backend_devices_cnt_dst = backend_devices_cnt_src + 1; backend_devices_cnt_dst < backend_ctx->backend_devices_cnt; backend_devices_cnt_dst++)
+    {
+      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[backend_devices_cnt_dst];
 
-      #if defined (DEBUG)
-      if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1))
-      #else
-      if (rc_hiprtcCompileProgram == -1)
-      #endif
-      {
-        char *build_log = (char *) hcmalloc (build_log_size + 1);
+      if (device_param_dst->skipped == true) continue;
+      if (device_param_dst->skipped_warning == true) continue;
 
-        if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1)
-        {
-          hcfree (build_log);
+      if (is_same_device_type (device_param_src, device_param_dst) == false) continue;
 
-          return false;
-        }
+      device_param_dst->kernel_accel   = device_param_src->kernel_accel;
+      device_param_dst->kernel_loops   = device_param_src->kernel_loops;
+      device_param_dst->kernel_threads = device_param_src->kernel_threads;
 
-        build_log[build_log_size] = 0;
+      const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param_dst->device_processors) * device_param_dst->kernel_threads;
 
-        puts (build_log);
+      device_param_dst->hardware_power = hardware_power;
 
-        hcfree (build_log);
-      }
+      const u32 kernel_power = device_param_dst->hardware_power * device_param_dst->kernel_accel;
 
-      if (rc_hiprtcCompileProgram == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+      device_param_dst->kernel_power = kernel_power;
+    }
+  }
+}
 
-        return false;
-      }
+void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
+  status_ctx_t         *status_ctx          = hashcat_ctx->status_ctx;
+  user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
+  user_options_t       *user_options        = hashcat_ctx->user_options;
 
-      size_t binary_size = 0;
+  if (backend_ctx->enabled == false) return;
 
-      if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false;
+  u32 kernel_power_all = 0;
 
-      char *binary = (char *) hcmalloc (binary_size);
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
+  {
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
-      if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false;
+    if (device_param->skipped == true) continue;
+    if (device_param->skipped_warning == true) continue;
 
-      if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
+    kernel_power_all += device_param->kernel_power;
+  }
 
-      #define LOG_SIZE 8192
+  backend_ctx->kernel_power_all = kernel_power_all;
 
-      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+  /*
+   * Inform user about possible slow speeds
+   */
 
-      int mod_cnt = 6;
+  if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK))
+  {
+    if (status_ctx->words_base < kernel_power_all)
+    {
+      if (user_options->quiet == false)
+      {
+        clear_prompt (hashcat_ctx);
 
-      hipJitOption mod_opts[6];
-      void *mod_vals[6];
+        event_log_advice (hashcat_ctx, "The wordlist or mask that you are using is too small.");
+        event_log_advice (hashcat_ctx, "This means that hashcat cannot use the full parallel power of your device(s).");
+        event_log_advice (hashcat_ctx, "Unless you supply more work, your cracking speed will drop.");
+        event_log_advice (hashcat_ctx, "For tips on supplying more work, see: https://hashcat.net/faq/morework");
+        event_log_advice (hashcat_ctx, NULL);
+      }
+    }
+  }
+}
 
-      mod_opts[0] = hipJitOptionTargetFromContext;
-      mod_vals[0] = (void *) 0;
+void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
+{
+  combinator_ctx_t     *combinator_ctx      = hashcat_ctx->combinator_ctx;
+  hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
+  hashes_t             *hashes              = hashcat_ctx->hashes;
+  mask_ctx_t           *mask_ctx            = hashcat_ctx->mask_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
+  straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
+  user_options_t       *user_options        = hashcat_ctx->user_options;
+  user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
 
-      mod_opts[1] = hipJitOptionLogVerbose;
-      mod_vals[1] = (void *) 1;
+  if (backend_ctx->enabled == false) return;
 
-      mod_opts[2] = hipJitOptionInfoLogBuffer;
-      mod_vals[2] = (void *) mod_info_log;
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
+  {
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
-      mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes;
-      mod_vals[3] = (void *) LOG_SIZE;
+    if (device_param->skipped == true) continue;
+    if (device_param->skipped_warning == true) continue;
 
-      mod_opts[4] = hipJitOptionErrorLogBuffer;
-      mod_vals[4] = (void *) mod_error_log;
+    device_param->kernel_loops_min = device_param->kernel_loops_min_sav;
+    device_param->kernel_loops_max = device_param->kernel_loops_max_sav;
 
-      mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
-      mod_vals[5] = (void *) LOG_SIZE;
+    if (device_param->kernel_loops_min < device_param->kernel_loops_max)
+    {
+      u32 innerloop_cnt = 0;
 
-      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
+        if (user_options->slow_candidates == true)
+        {
+          innerloop_cnt = 1;
+        }
+        else
+        {
+          if      (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)  innerloop_cnt = MIN (KERNEL_RULES, (u32) straight_ctx->kernel_rules_cnt);
+          else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)     innerloop_cnt = MIN (KERNEL_COMBS, (u32) combinator_ctx->combs_cnt);
+          else if (user_options_extra->attack_kern == ATTACK_KERN_BF)        innerloop_cnt = MIN (KERNEL_BFS,   (u32) mask_ctx->bfs_cnt);
+        }
       }
-
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
-
-      if (cache_disable == false)
+      else
       {
-        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
+        innerloop_cnt = hashes->salts_buf[0].salt_iter;
       }
 
-      hcfree (mod_info_log);
-      hcfree (mod_error_log);
-
-      hcfree (binary);
+      if ((innerloop_cnt >= device_param->kernel_loops_min) &&
+          (innerloop_cnt <= device_param->kernel_loops_max))
+      {
+        device_param->kernel_loops_max = innerloop_cnt;
+      }
     }
+  }
+}
 
-    if (device_param->is_opencl == true)
-    {
-      size_t build_log_size = 0;
-
-      int CL_rc;
+static int get_cuda_kernel_wgs (hashcat_ctx_t *hashcat_ctx, CUfunction function, u32 *result)
+{
+  int max_threads_per_block;
 
-      cl_program p1 = NULL;
+  if (hc_cuFuncGetAttribute (hashcat_ctx, &max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1;
 
-      // workaround opencl issue with Apple Silicon
+  *result = (u32) max_threads_per_block;
 
-      if (strncmp (device_param->device_name, "Apple M", 7) == 0)
-      {
-        if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, opencl_program) == -1) return false;
+  return 0;
+}
 
-        CL_rc = hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result)
+{
+  int shared_size_bytes;
 
-        hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
-      }
-      else
-      {
-        if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &p1) == -1) return false;
+  if (hc_cuFuncGetAttribute (hashcat_ctx, &shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1;
 
-        CL_rc = hc_clCompileProgram (hashcat_ctx, p1, 1, &device_param->opencl_device, build_options_buf, 0, NULL, NULL, NULL, NULL);
+  *result = (u64) shared_size_bytes;
 
-        hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
-      }
+  return 0;
+}
 
-      #if defined (DEBUG)
-      if ((build_log_size > 1) || (CL_rc == -1))
-      #else
-      if (CL_rc == -1)
-      #endif
-      {
-        char *build_log = (char *) hcmalloc (build_log_size + 1);
+static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u32 *result)
+{
+  int max_threads_per_block;
 
-        int rc_clGetProgramBuildInfo;
+  if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1;
 
-        if (strncmp (device_param->device_name, "Apple M", 7) == 0)
-        {
-          rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
-        }
-        else
-        {
-          rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
-        }
+  *result = (u32) max_threads_per_block;
 
-        if (rc_clGetProgramBuildInfo == -1)
-        {
-          hcfree (build_log);
+  return 0;
+}
 
-          return false;
-        }
+static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u64 *result)
+{
+  int shared_size_bytes;
 
-        build_log[build_log_size] = 0;
+  if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1;
 
-        puts (build_log);
+  *result = (u64) shared_size_bytes;
 
-        hcfree (build_log);
-      }
+  return 0;
+}
 
-      if (CL_rc == -1) return false;
+static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
+{
+  size_t work_group_size = 0;
 
-      // workaround opencl issue with Apple Silicon
+  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL) == -1) return -1;
 
-      if (strncmp (device_param->device_name, "Apple M", 7) != 0)
-      {
-        cl_program t2[1];
+  u32 kernel_threads = (u32) work_group_size;
 
-        t2[0] = p1;
+  size_t compile_work_group_size[3] = { 0, 0, 0 };
 
-        cl_program fin;
+  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL) == -1) return -1;
 
-        if (hc_clLinkProgram (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, NULL, 1, t2, NULL, NULL, &fin) == -1) return false;
+  const size_t cwgs_total = compile_work_group_size[0] * compile_work_group_size[1] * compile_work_group_size[2];
 
-        // it seems errors caused by clLinkProgram() do not go into CL_PROGRAM_BUILD
-        // I couldn't find any information on the web explaining how else to retrieve the error messages from the linker
+  if (cwgs_total > 0)
+  {
+    kernel_threads = MIN (kernel_threads, (u32) cwgs_total);
+  }
 
-        *opencl_program = fin;
+  *result = kernel_threads;
 
-        hc_clReleaseProgram (hashcat_ctx, p1);
-      }
+  return 0;
+}
 
-      if (cache_disable == false)
-      {
-        size_t binary_size;
+static int get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
+{
+  size_t preferred_work_group_size_multiple = 0;
 
-        if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL) == -1) return false;
+  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof (preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL) == -1) return -1;
 
-        char *binary = (char *) hcmalloc (binary_size);
+  *result = (u32) preferred_work_group_size_multiple;
 
-        if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL) == -1) return false;
+  return 0;
+}
 
-        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
+static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result)
+{
+  cl_ulong local_mem_size = 0;
 
-        hcfree (binary);
-      }
-    }
-  }
-  else
-  {
-    if (read_kernel_binary (hashcat_ctx, cached_file, kernel_lengths, kernel_sources) == false) return false;
+  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (local_mem_size), &local_mem_size, NULL) == -1) return -1;
 
-    if (device_param->is_cuda == true)
-    {
-      #define LOG_SIZE 8192
+  *result = local_mem_size;
 
-      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+  return 0;
+}
 
-      int mod_cnt = 6;
+static int get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result)
+{
+  cl_ulong dynamic_local_mem_size = 0;
 
-      CUjit_option mod_opts[7];
-      void *mod_vals[7];
+  if (hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (dynamic_local_mem_size), &dynamic_local_mem_size, NULL) == -1) return -1;
 
-      mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
-      mod_vals[0] = (void *) 0;
+  // unknown how to query this information in OpenCL
+  // we therefore reset to zero
+  // the above call to hc_clGetKernelWorkGroupInfo() is just to avoid compiler warnings
 
-      mod_opts[1] = CU_JIT_LOG_VERBOSE;
-      mod_vals[1] = (void *) 1;
+  dynamic_local_mem_size = 0;
 
-      mod_opts[2] = CU_JIT_INFO_LOG_BUFFER;
-      mod_vals[2] = (void *) mod_info_log;
+  *result = dynamic_local_mem_size;
 
-      mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-      mod_vals[3] = (void *) LOG_SIZE;
+  return 0;
+}
 
-      mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
-      mod_vals[4] = (void *) mod_error_log;
+#if defined (__APPLE__)
+static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module, mtl_library *metal_library)
+#else
+static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module, MAYBE_UNUSED void *metal_library)
+#endif
+{
+  const hashconfig_t    *hashconfig    = hashcat_ctx->hashconfig;
+  const user_options_t  *user_options  = hashcat_ctx->user_options;
 
-      mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
-      mod_vals[5] = (void *) LOG_SIZE;
+  #if !defined (_WIN) && !defined (__CYGWIN__) && !defined (__MSYS__)
+  const folder_config_t *folder_config = hashcat_ctx->folder_config;
+  #endif
 
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
-      {
-        mod_opts[6] = CU_JIT_MAX_REGISTERS;
-        mod_vals[6] = (void *) 128;
+  bool cached = true;
 
-        mod_cnt++;
-      }
+  if (cache_disable == true)
+  {
+    cached = false;
+  }
 
-      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
+  if (hc_path_read (cached_file) == false)
+  {
+    cached = false;
+  }
 
-        return false;
-      }
+  if (hc_path_is_empty (cached_file) == true)
+  {
+    cached = false;
+  }
 
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
+  /**
+   * kernel compile or load
+   */
 
-      hcfree (mod_info_log);
-      hcfree (mod_error_log);
-    }
+  size_t kernel_lengths_buf = 0;
 
-    if (device_param->is_hip == true)
-    {
-      #define LOG_SIZE 8192
+  size_t *kernel_lengths = &kernel_lengths_buf;
 
-      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+  char *kernel_sources_buf = NULL;
 
-      int mod_cnt = 6;
+  char **kernel_sources = &kernel_sources_buf;
 
-      hipJitOption mod_opts[6];
-      void *mod_vals[6];
+  if (cached == false)
+  {
+    #if defined (DEBUG)
+    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache. Please be patient...", device_param->device_id + 1, filename_from_filepath (cached_file));
+    #endif
 
-      mod_opts[0] = hipJitOptionTargetFromContext;
-      mod_vals[0] = (void *) 0;
+    if (read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources) == false) return false;
 
-      mod_opts[1] = hipJitOptionLogVerbose;
-      mod_vals[1] = (void *) 1;
+    if (device_param->is_cuda == true)
+    {
+      nvrtcProgram program;
 
-      mod_opts[2] = hipJitOptionInfoLogBuffer;
-      mod_vals[2] = (void *) mod_info_log;
+      if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
 
-      mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes;
-      mod_vals[3] = (void *) LOG_SIZE;
+      char **nvrtc_options = (char **) hccalloc (5 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
 
-      mod_opts[4] = hipJitOptionErrorLogBuffer;
-      mod_vals[4] = (void *) mod_error_log;
+      nvrtc_options[0] = "--restrict";
+      nvrtc_options[1] = "--device-as-default-execution-space";
+      nvrtc_options[2] = "--gpu-architecture";
 
-      mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
-      mod_vals[5] = (void *) LOG_SIZE;
+      hc_asprintf (&nvrtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
 
-      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
+      // untested on windows, but it should work
+      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
+      hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", "OpenCL");
+      #else
+      hc_asprintf (&nvrtc_options[4], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
       #endif
 
-      hcfree (mod_info_log);
-      hcfree (mod_error_log);
-    }
+      char *nvrtc_options_string = hcstrdup (build_options_buf);
 
-    if (device_param->is_opencl == true)
-    {
-      if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false;
+      const int num_options = 5 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 5);
 
-      if (hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL) == -1) return false;
-    }
-  }
+      const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
 
-  hcfree (kernel_sources[0]);
+      hcfree (nvrtc_options_string);
+      hcfree (nvrtc_options);
 
-  return true;
-}
+      size_t build_log_size = 0;
 
-int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
-{
-  const bitmap_ctx_t         *bitmap_ctx          = hashcat_ctx->bitmap_ctx;
-  const folder_config_t      *folder_config       = hashcat_ctx->folder_config;
-  const hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
-  const hashes_t             *hashes              = hashcat_ctx->hashes;
-  const module_ctx_t         *module_ctx          = hashcat_ctx->module_ctx;
-        backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
-  const straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
-  const user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
-  const user_options_t       *user_options        = hashcat_ctx->user_options;
+      hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
 
-  if (backend_ctx->enabled == false) return 0;
+      #if defined (DEBUG)
+      if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1))
+      #else
+      if (rc_nvrtcCompileProgram == -1)
+      #endif
+      {
+        char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-  u64 size_total_host_all = 0;
+        if (hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log) == -1)
+        {
+          hcfree (build_log);
 
-  u32 hardware_power_all = 0;
+          return false;
+        }
 
-  int backend_memory_hit_warnings    = 0;
-  int backend_runtime_skip_warnings  = 0;
-  int backend_kernel_build_warnings  = 0;
-  int backend_kernel_create_warnings = 0;
-  int backend_kernel_accel_warnings  = 0;
-  int backend_extra_size_warning     = 0;
+        build_log[build_log_size] = 0;
 
-  backend_ctx->memory_hit_warning    = false;
-  backend_ctx->runtime_skip_warning  = false;
-  backend_ctx->kernel_build_warning  = false;
-  backend_ctx->kernel_create_warning = false;
-  backend_ctx->kernel_accel_warnings = false;
-  backend_ctx->extra_size_warning    = false;
-  backend_ctx->mixed_warnings        = false;
+        puts (build_log);
 
-  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
-  {
-    /**
-     * host buffer
-     */
+        hcfree (build_log);
+      }
 
-    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+      if (rc_nvrtcCompileProgram == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
 
-    if (device_param->skipped == true) continue;
+        return false;
+      }
 
-    EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int));
+      size_t binary_size = 0;
 
-    const int device_id = device_param->device_id;
+      if (hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size) == -1) return false;
 
-    /**
-     * module depending checks
-     */
+      char *binary = (char *) hcmalloc (binary_size);
 
-    device_param->skipped_warning = false;
+      if (hc_nvrtcGetPTX (hashcat_ctx, program, binary) == -1) return false;
 
-    if (module_ctx->module_unstable_warning != MODULE_DEFAULT)
-    {
-      const bool unstable_warning = module_ctx->module_unstable_warning (hashconfig, user_options, user_options_extra, device_param);
+      if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
 
-      if ((unstable_warning == true) && (user_options->force == false))
-      {
-        event_log_warning (hashcat_ctx, "* Device #%u: Skipping (hash-mode %u)", device_id + 1, hashconfig->hash_mode);
-        event_log_warning (hashcat_ctx, "             This is due to a known CUDA/HIP/OpenCL runtime/driver issue (not a hashcat issue)");
-        event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+      #define LOG_SIZE 8192
 
-        backend_runtime_skip_warnings++;
+      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
 
-        device_param->skipped_warning = true;
-        continue;
-      }
-    }
+      int mod_cnt = 6;
 
-    /**
-     * tuning db
-     */
+      CUjit_option mod_opts[7];
+      void *mod_vals[7];
 
-    if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT)
-    {
-      const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra);
+      mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
+      mod_vals[0] = (void *) 0;
 
-      char *lines_buf = hcstrdup (extra_tuningdb_block);
+      mod_opts[1] = CU_JIT_LOG_VERBOSE;
+      mod_vals[1] = (void *) 1;
 
-      char *saveptr = NULL;
+      mod_opts[2] = CU_JIT_INFO_LOG_BUFFER;
+      mod_vals[2] = (void *) mod_info_log;
 
-      char *next = strtok_r (lines_buf, "\n", &saveptr);
+      mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[3] = (void *) LOG_SIZE;
 
-      int line_num = 0;
+      mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
+      mod_vals[4] = (void *) mod_error_log;
 
-      do
+      mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[5] = (void *) LOG_SIZE;
+
+      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
       {
-        line_num++;
+        mod_opts[6] = CU_JIT_MAX_REGISTERS;
+        mod_vals[6] = (void *) 128;
 
-        const size_t line_len = strlen (next);
+        mod_cnt++;
+      }
 
-        if (line_len == 0) continue;
+      #if defined (WITH_CUBIN)
 
-        if (next[0] == '#') continue;
+      char *jit_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1);
 
-        tuning_db_process_line (hashcat_ctx, next, line_num);
+      int jit_cnt = 6;
 
-      } while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL);
+      CUjit_option jit_opts[7];
+      void *jit_vals[7];
 
-      hcfree (lines_buf);
+      jit_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
+      jit_vals[0] = (void *) 0;
 
-      // todo: print loaded 'cnt' message
+      jit_opts[1] = CU_JIT_LOG_VERBOSE;
+      jit_vals[1] = (void *) 1;
 
-      // sort the database
+      jit_opts[2] = CU_JIT_INFO_LOG_BUFFER;
+      jit_vals[2] = (void *) jit_info_log;
 
-      tuning_db_t *tuning_db = hashcat_ctx->tuning_db;
+      jit_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      jit_vals[3] = (void *) LOG_SIZE;
 
-      qsort (tuning_db->alias_buf, tuning_db->alias_cnt, sizeof (tuning_db_alias_t), sort_by_tuning_db_alias);
-      qsort (tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry);
-    }
+      jit_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
+      jit_vals[4] = (void *) jit_error_log;
 
-    // vector_width
+      jit_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      jit_vals[5] = (void *) LOG_SIZE;
 
-    int vector_width = 0;
+      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
+      {
+        jit_opts[6] = CU_JIT_MAX_REGISTERS;
+        jit_vals[6] = (void *) 128;
 
-    if (user_options->backend_vector_width_chgd == false)
-    {
-      // tuning db
+        jit_cnt++;
+      }
 
-      tuning_db_entry_t *tuningdb_entry;
+      CUlinkState state;
 
-      if (user_options->slow_candidates == true)
-      {
-        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode);
-      }
-      else
+      if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1)
       {
-        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", jit_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
       }
 
-      if (tuningdb_entry == NULL || tuningdb_entry->vector_width == -1)
+      if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, kernel_name, 0, NULL, NULL) == -1)
       {
-        if (hashconfig->opti_type & OPTI_TYPE_USES_BITS_64)
-        {
-          if (device_param->is_cuda == true)
-          {
-            // cuda does not support this query
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", jit_error_log);
+        event_log_error (hashcat_ctx, NULL);
 
-            vector_width = 1;
-          }
+        return false;
+      }
 
-          if (device_param->is_hip == true)
-          {
-            // hip does not support this query
+      void *cubin = NULL;
 
-            vector_width = 1;
-          }
+      size_t cubin_size = 0;
 
-          if (device_param->is_opencl == true)
-          {
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
-          }
-        }
-        else
-        {
-          if (device_param->is_cuda == true)
-          {
-            // cuda does not support this query
+      if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", jit_error_log);
+        event_log_error (hashcat_ctx, NULL);
 
-            vector_width = 1;
-          }
+        return false;
+      }
 
-          if (device_param->is_hip == true)
-          {
-            // hip does not support this query
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", jit_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
 
-            vector_width = 1;
-          }
+      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, cubin, mod_cnt, mod_opts, mod_vals) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
 
-          if (device_param->is_opencl == true)
-          {
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL) == -1)
-            {
-              device_param->skipped = true;
-              continue;
-            }
-          }
-        }
+        return false;
       }
-      else
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      if (cache_disable == false)
       {
-        vector_width = (cl_uint) tuningdb_entry->vector_width;
+        if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false;
       }
-    }
-    else
-    {
-      vector_width = user_options->backend_vector_width;
-    }
 
-    // We can't have SIMD in kernels where we have an unknown final password length
-    // It also turns out that pure kernels (that have a higher register pressure)
-    // actually run faster on scalar GPU (like 1080) without SIMD
+      if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return false;
 
-    if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
-    {
-      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+      hcfree (jit_info_log);
+      hcfree (jit_error_log);
+
+      #else
+
+      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
       {
-        vector_width = 1;
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      if (cache_disable == false)
+      {
+        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
       }
+
+      #endif
+
+      hcfree (mod_info_log);
+      hcfree (mod_error_log);
+
+      hcfree (binary);
     }
 
-    if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)
+    if (device_param->is_hip == true)
     {
-      // not working in this mode because the GID does not align with password candidate count
-      // and if it cracks, it will crack the same hash twice, running into segfaults
+      hiprtcProgram program;
 
-      vector_width = 1;
-    }
+      if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
 
-    if (vector_width > 16) vector_width = 16;
+      char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
 
-    device_param->vector_width = vector_width;
+      //hiprtc_options[0] = "--restrict";
+      //hiprtc_options[1] = "--device-as-default-execution-space";
+      //hiprtc_options[2] = "--gpu-architecture";
 
-    /**
-     * kernel accel and loops tuning db adjustment
-     */
+      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
 
-    device_param->kernel_accel_min   = hashconfig->kernel_accel_min;
-    device_param->kernel_accel_max   = hashconfig->kernel_accel_max;
-    device_param->kernel_loops_min   = hashconfig->kernel_loops_min;
-    device_param->kernel_loops_max   = hashconfig->kernel_loops_max;
-    device_param->kernel_threads_min = hashconfig->kernel_threads_min;
-    device_param->kernel_threads_max = hashconfig->kernel_threads_max;
+      /* 4.3 linux
+      hiprtc_options[1] = "-I";
+      hiprtc_options[2] = "/opt/rocm/hip/bin/include";
+      hiprtc_options[3] = "-I";
+      hiprtc_options[4] = "/opt/rocm/include";
+      hiprtc_options[5] = "-I";
+      */
 
-    tuning_db_entry_t *tuningdb_entry = NULL;
+      hiprtc_options[1] = "-nocudainc";
+      hiprtc_options[2] = "-nocudalib";
+      hiprtc_options[3] = "";
+      hiprtc_options[4] = "";
 
-    if (user_options->slow_candidates == true)
-    {
-      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode);
-    }
-    else
-    {
-      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
-    }
+      // untested but it should work
+      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
+      hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", "OpenCL");
+      #else
+      hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
+      #endif
 
-    // user commandline option override tuning db
-    // but both have to stay inside the boundaries of the module
+      char *hiprtc_options_string = hcstrdup (build_options_buf);
 
-    if (user_options->kernel_accel_chgd == true)
-    {
-      const u32 _kernel_accel = user_options->kernel_accel;
+      const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6);
 
-      if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max))
-      {
-        device_param->kernel_accel_min = _kernel_accel;
-        device_param->kernel_accel_max = _kernel_accel;
-      }
-    }
-    else
-    {
-      if (tuningdb_entry != NULL)
-      {
-        const u32 _kernel_accel = tuningdb_entry->kernel_accel;
+      const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options);
 
-        if (_kernel_accel == (u32) -1) // native, makes sense if OPTS_TYPE_MP_MULTI_DISABLE is used
-        {
-          if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
-          {
-            if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT)
-            {
-              event_log_warning (hashcat_ctx, "ATTENTION! This hash-mode requires manual tuning to achieve full performance.");
-              event_log_warning (hashcat_ctx, "The loss of performance can be greater than 100%% without manual tuning.");
-              event_log_warning (hashcat_ctx, NULL);
-              event_log_warning (hashcat_ctx, "This warning message disappears after a definition for the installed");
-              event_log_warning (hashcat_ctx, "compute-device in this computer has been added to either list:");
-              event_log_warning (hashcat_ctx, "- src/modules/module_%05d.c", hashconfig->hash_mode);
-              event_log_warning (hashcat_ctx, "- hashcat.hctune");
-              event_log_warning (hashcat_ctx, NULL);
-              event_log_warning (hashcat_ctx, "For instructions on tuning, see src/modules/module_%05d.c", hashconfig->hash_mode);
-              event_log_warning (hashcat_ctx, "Also, consider sending a PR to Hashcat Master so that other users can benefit from your work.");
-              event_log_warning (hashcat_ctx, NULL);
-            }
-          }
+      hcfree (hiprtc_options_string);
+      hcfree (hiprtc_options);
 
-          device_param->kernel_accel_min = device_param->device_processors;
-          device_param->kernel_accel_max = device_param->device_processors;
-        }
-        else
-        {
-          if (_kernel_accel)
-          {
-            if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max))
-            {
-              device_param->kernel_accel_min = _kernel_accel;
-              device_param->kernel_accel_max = _kernel_accel;
-            }
-          }
-        }
-      }
-    }
+      size_t build_log_size = 0;
 
-    if (user_options->kernel_loops_chgd == true)
-    {
-      const u32 _kernel_loops = user_options->kernel_loops;
+      hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
 
-      if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max))
-      {
-        device_param->kernel_loops_min = _kernel_loops;
-        device_param->kernel_loops_max = _kernel_loops;
-      }
-    }
-    else
-    {
-      if (tuningdb_entry != NULL)
+      #if defined (DEBUG)
+      if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1))
+      #else
+      if (rc_hiprtcCompileProgram == -1)
+      #endif
       {
-        u32 _kernel_loops = tuningdb_entry->kernel_loops;
+        char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-        if (_kernel_loops)
+        if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1)
         {
-          if (user_options->workload_profile == 1)
-          {
-            _kernel_loops = (_kernel_loops > 8) ? _kernel_loops / 8 : 1;
-          }
-          else if (user_options->workload_profile == 2)
-          {
-            _kernel_loops = (_kernel_loops > 4) ? _kernel_loops / 4 : 1;
-          }
+          hcfree (build_log);
 
-          if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max))
-          {
-            device_param->kernel_loops_min = _kernel_loops;
-            device_param->kernel_loops_max = _kernel_loops;
-          }
+          return false;
         }
-      }
-    }
 
-    // there's no thread column in tuning db, stick to commandline if defined
+        build_log[build_log_size] = 0;
 
-    if (user_options->kernel_threads_chgd == true)
-    {
-      const u32 _kernel_threads = user_options->kernel_threads;
+        puts (build_log);
 
-      if ((_kernel_threads >= device_param->kernel_threads_min) && (_kernel_threads <= device_param->kernel_threads_max))
-      {
-        device_param->kernel_threads_min = _kernel_threads;
-        device_param->kernel_threads_max = _kernel_threads;
+        hcfree (build_log);
       }
-    }
-
-    if (user_options->slow_candidates == true)
-    {
-    }
-    else
-    {
-      // we have some absolute limits for fast hashes (because of limit constant memory), make sure not to overstep
 
-      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      if (rc_hiprtcCompileProgram == -1)
       {
-        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
-        {
-          device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_RULES);
-          device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_RULES);
-        }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
-        {
-          device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_COMBS);
-          device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_COMBS);
-        }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
-        {
-          device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_BFS);
-          device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_BFS);
-        }
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+
+        return false;
       }
-    }
 
-    device_param->kernel_loops_min_sav = device_param->kernel_loops_min;
-    device_param->kernel_loops_max_sav = device_param->kernel_loops_max;
+      size_t binary_size = 0;
 
-    /**
-     * device properties
-     */
+      if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false;
 
-    const u32 device_processors = device_param->device_processors;
+      char *binary = (char *) hcmalloc (binary_size);
 
-    /**
-     * device threads
-     */
+      if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false;
 
-    if (hashconfig->opts_type & OPTS_TYPE_MAXIMUM_THREADS)
-    {
-      // default for all, because the else branch is doing the same (nothing), but is actually used as a way to
-      // disable the default native thread configuration for HIP
-      // this can have negative performance if not tested on multiple different gpu architectures
-    }
-    else if (hashconfig->opts_type & OPTS_TYPE_NATIVE_THREADS)
-    {
-      u32 native_threads = 0;
+      if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
 
-      if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
-      {
-        native_threads = 1;
-      }
-      else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+      #define LOG_SIZE 8192
+
+      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+
+      int mod_cnt = 6;
+
+      hipJitOption mod_opts[6];
+      void *mod_vals[6];
+
+      mod_opts[0] = hipJitOptionTargetFromContext;
+      mod_vals[0] = (void *) 0;
+
+      mod_opts[1] = hipJitOptionLogVerbose;
+      mod_vals[1] = (void *) 1;
+
+      mod_opts[2] = hipJitOptionInfoLogBuffer;
+      mod_vals[2] = (void *) mod_info_log;
+
+      mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes;
+      mod_vals[3] = (void *) LOG_SIZE;
+
+      mod_opts[4] = hipJitOptionErrorLogBuffer;
+      mod_vals[4] = (void *) mod_error_log;
+
+      mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
+      mod_vals[5] = (void *) LOG_SIZE;
+
+      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
       {
-        native_threads = device_param->kernel_preferred_wgs_multiple;
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
       }
-      else
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      if (cache_disable == false)
       {
-        // abort?
+        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
       }
 
-      if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
+      hcfree (mod_info_log);
+      hcfree (mod_error_log);
+
+      hcfree (binary);
+    }
+
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      mtl_library metal_lib = NULL;
+
+      if (hc_mtlCreateLibraryWithSource (hashcat_ctx, device_param->metal_device, kernel_sources[0], build_options_buf, folder_config->cpath_real, &metal_lib) == -1) return false;
+
+      *metal_library = metal_lib;
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful.", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+    }
+    #endif // __APPLE__
+
+    if (device_param->is_opencl == true)
+    {
+      size_t build_log_size = 0;
+
+      int CL_rc;
+
+      cl_program p1 = NULL;
+
+      // workaround opencl issue with Apple Silicon
+
+      if (strncmp (device_param->device_name, "Apple M", 7) == 0)
       {
-        device_param->kernel_threads_min = native_threads;
-        device_param->kernel_threads_max = native_threads;
+        if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, opencl_program) == -1) return false;
+
+        CL_rc = hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+
+        hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
       }
       else
       {
-        // abort?
+        if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &p1) == -1) return false;
+
+        CL_rc = hc_clCompileProgram (hashcat_ctx, p1, 1, &device_param->opencl_device, build_options_buf, 0, NULL, NULL, NULL, NULL);
+
+        hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
       }
-    }
-    else
-    {
-      if (device_param->is_hip == true)
+
+      #if defined (DEBUG)
+      if ((build_log_size > 1) || (CL_rc == -1))
+      #else
+      if (CL_rc == -1)
+      #endif
       {
-        const u32 native_threads = device_param->kernel_preferred_wgs_multiple;
+        char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-        if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
+        int rc_clGetProgramBuildInfo;
+
+        if (strncmp (device_param->device_name, "Apple M", 7) == 0)
         {
-          device_param->kernel_threads_min = native_threads;
-          device_param->kernel_threads_max = native_threads;
+          rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, *opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
         }
         else
         {
-          // abort?
+          rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, p1, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
         }
-      }
-    }
 
-    // this seems to work always
+        if (rc_clGetProgramBuildInfo == -1)
+        {
+          hcfree (build_log);
 
-    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
-    {
-      u32 native_threads = 1;
+          return false;
+        }
 
-      if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
-      {
-        device_param->kernel_threads_min = native_threads;
-        device_param->kernel_threads_max = native_threads;
-      }
-    }
+        build_log[build_log_size] = 0;
 
-    /**
-     * create context for each device
-     */
+        puts (build_log);
 
-    if (device_param->is_cuda == true)
-    {
-      if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
-      {
-        device_param->skipped = true;
-        continue;
+        hcfree (build_log);
       }
 
-      if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
-    }
+      if (CL_rc == -1) return false;
 
-    if (device_param->is_hip == true)
-    {
-      if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+      // workaround opencl issue with Apple Silicon
 
-      if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1)
+      if (strncmp (device_param->device_name, "Apple M", 7) != 0)
       {
-        device_param->skipped = true;
-        continue;
-      }
-    }
+        cl_program t2[1];
 
-    if (device_param->is_opencl == true)
-    {
-      /*
-      cl_context_properties properties[3];
+        t2[0] = p1;
 
-      properties[0] = CL_CONTEXT_PLATFORM;
-      properties[1] = (cl_context_properties) device_param->opencl_platform;
-      properties[2] = 0;
+        cl_program fin;
 
-      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
-      */
+        if (hc_clLinkProgram (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, NULL, 1, t2, NULL, NULL, &fin) == -1) return false;
 
-      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+        // it seems errors caused by clLinkProgram() do not go into CL_PROGRAM_BUILD
+        // I couldn't find any information on the web explaining how else to retrieve the error messages from the linker
 
-      /**
-       * create command-queue
-       */
+        *opencl_program = fin;
 
-      // not supported with NV
-      // device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
+        hc_clReleaseProgram (hashcat_ctx, p1);
+      }
 
-      if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
+      if (cache_disable == false)
       {
-        device_param->skipped = true;
-        continue;
-      }
-    }
+        size_t binary_size;
 
-    /**
-     * create stream for CUDA devices
-     */
+        if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL) == -1) return false;
 
-    if (device_param->is_cuda == true)
-    {
-      if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
-    }
+        char *binary = (char *) hcmalloc (binary_size);
 
-    /**
-     * create stream for HIP devices
-     */
+        if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL) == -1) return false;
 
-    if (device_param->is_hip == true)
-    {
-      if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
-      {
-        device_param->skipped = true;
-        continue;
+        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
+
+        hcfree (binary);
       }
     }
-
-    /**
-     * create events for CUDA devices
-     */
+  }
+  else
+  {
+    if (read_kernel_binary (hashcat_ctx, cached_file, kernel_lengths, kernel_sources) == false) return false;
 
     if (device_param->is_cuda == true)
     {
-      if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event1, CU_EVENT_BLOCKING_SYNC) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+      #define LOG_SIZE 8192
 
-      if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_BLOCKING_SYNC) == -1)
+      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+
+      int mod_cnt = 6;
+
+      CUjit_option mod_opts[7];
+      void *mod_vals[7];
+
+      mod_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
+      mod_vals[0] = (void *) 0;
+
+      mod_opts[1] = CU_JIT_LOG_VERBOSE;
+      mod_vals[1] = (void *) 1;
+
+      mod_opts[2] = CU_JIT_INFO_LOG_BUFFER;
+      mod_vals[2] = (void *) mod_info_log;
+
+      mod_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[3] = (void *) LOG_SIZE;
+
+      mod_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
+      mod_vals[4] = (void *) mod_error_log;
+
+      mod_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[5] = (void *) LOG_SIZE;
+
+      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
       {
-        device_param->skipped = true;
-        continue;
+        mod_opts[6] = CU_JIT_MAX_REGISTERS;
+        mod_vals[6] = (void *) 128;
+
+        mod_cnt++;
       }
 
-      if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event3, CU_EVENT_DISABLE_TIMING) == -1)
+      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
       {
-        device_param->skipped = true;
-        continue;
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
       }
-    }
 
-    /**
-     * create events for HIP devices
-     */
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      hcfree (mod_info_log);
+      hcfree (mod_error_log);
+    }
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+      #define LOG_SIZE 8192
 
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
 
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
-    }
+      int mod_cnt = 6;
 
-    /**
-     * create input buffers on device : calculate size of fixed memory buffers
-     */
+      hipJitOption mod_opts[6];
+      void *mod_vals[6];
 
-    u64 size_root_css   = SP_PW_MAX *           sizeof (cs_t);
-    u64 size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t);
+      mod_opts[0] = hipJitOptionTargetFromContext;
+      mod_vals[0] = (void *) 0;
 
-    device_param->size_root_css   = size_root_css;
-    device_param->size_markov_css = size_markov_css;
+      mod_opts[1] = hipJitOptionLogVerbose;
+      mod_vals[1] = (void *) 1;
 
-    u64 size_results = sizeof (u32);
+      mod_opts[2] = hipJitOptionInfoLogBuffer;
+      mod_vals[2] = (void *) mod_info_log;
 
-    device_param->size_results = size_results;
+      mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes;
+      mod_vals[3] = (void *) LOG_SIZE;
 
-    u64 size_rules   = (u64) straight_ctx->kernel_rules_cnt * sizeof (kernel_rule_t);
-    u64 size_rules_c = (u64) KERNEL_RULES                   * sizeof (kernel_rule_t);
+      mod_opts[4] = hipJitOptionErrorLogBuffer;
+      mod_vals[4] = (void *) mod_error_log;
 
-    device_param->size_rules    = size_rules;
-    device_param->size_rules_c  = size_rules_c;
+      mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
+      mod_vals[5] = (void *) LOG_SIZE;
 
-    u64 size_plains  = (u64) hashes->digests_cnt * sizeof (plain_t);
-    u64 size_salts   = (u64) hashes->salts_cnt   * sizeof (salt_t);
-    u64 size_esalts  = (u64) hashes->digests_cnt * hashconfig->esalt_size;
-    u64 size_shown   = (u64) hashes->digests_cnt * sizeof (u32);
-    u64 size_digests = (u64) hashes->digests_cnt * (u64) hashconfig->dgst_size;
+      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
 
-    device_param->size_plains   = size_plains;
-    device_param->size_digests  = size_digests;
-    device_param->size_shown    = size_shown;
-    device_param->size_salts    = size_salts;
-    device_param->size_esalts   = size_esalts;
+        return false;
+      }
 
-    u64 size_combs          = KERNEL_COMBS * sizeof (pw_t);
-    u64 size_bfs            = KERNEL_BFS   * sizeof (bf_t);
-    u64 size_tm             = 32           * sizeof (bs_word_t);
-    u64 size_kernel_params  = 1            * sizeof (kernel_param_t);
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
 
-    device_param->size_bfs           = size_bfs;
-    device_param->size_combs         = size_combs;
-    device_param->size_tm            = size_tm;
-    device_param->size_kernel_params = size_kernel_params;
+      hcfree (mod_info_log);
+      hcfree (mod_error_log);
+    }
 
-    u64 size_st_digests = 1 * hashconfig->dgst_size;
-    u64 size_st_salts   = 1 * sizeof (salt_t);
-    u64 size_st_esalts  = 1 * hashconfig->esalt_size;
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      mtl_library metal_lib = NULL;
 
-    device_param->size_st_digests = size_st_digests;
-    device_param->size_st_salts   = size_st_salts;
-    device_param->size_st_esalts  = size_st_esalts;
+      if (hc_mtlCreateLibraryWithFile (hashcat_ctx, device_param->metal_device, cached_file, &metal_lib) == -1) return false;
 
-    // extra buffer
+      *metal_library = metal_lib;
 
-    u64 size_extra_buffer = 4;
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful.", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+    }
+    #endif
 
-    if (module_ctx->module_extra_buffer_size != MODULE_DEFAULT)
+    if (device_param->is_opencl == true)
     {
-      const u64 extra_buffer_size = module_ctx->module_extra_buffer_size (hashconfig, user_options, user_options_extra, hashes, device_param);
+      if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false;
 
-      if (extra_buffer_size == (u64) -1)
-      {
-        event_log_error (hashcat_ctx, "Invalid extra buffer size.");
+      if (hc_clBuildProgram (hashcat_ctx, *opencl_program, 1, &device_param->opencl_device, build_options_buf, NULL, NULL) == -1) return false;
+    }
+  }
 
-        backend_extra_size_warning++;
+  hcfree (kernel_sources[0]);
 
-        device_param->skipped_warning = true;
-        continue;
-      }
+  return true;
+}
 
-      device_param->extra_buffer_size = extra_buffer_size;
+int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
+{
+  const bitmap_ctx_t         *bitmap_ctx          = hashcat_ctx->bitmap_ctx;
+  const folder_config_t      *folder_config       = hashcat_ctx->folder_config;
+  const hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
+  const hashes_t             *hashes              = hashcat_ctx->hashes;
+  const module_ctx_t         *module_ctx          = hashcat_ctx->module_ctx;
+        backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
+  const straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
+  const user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
+  const user_options_t       *user_options        = hashcat_ctx->user_options;
 
-      // for the size we actually allocate we need to cheat a bit in order to make it more easy for plugin developer.
-      //
-      // we will divide this size by 4 to workaround opencl limitation.
-      // this collides with a theoretical scenario (like -n1 -T1) where there's only one workitem,
-      // because inside the kernel the target buffer is selected by workitem_id / 4.
-      // but the maximum size of the buffer would be only 1/4 of what is needed -> overflow.
-      //
-      // to workaround this we make sure that there's always a full buffer in each of the 4 allocated buffers available.
+  if (backend_ctx->enabled == false) return 0;
 
-      const u64 kernel_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
+  u64 size_total_host_all = 0;
 
-      const u64 extra_buffer_size_one = extra_buffer_size / kernel_power_max;
+  u32 hardware_power_all = 0;
 
-      size_extra_buffer = extra_buffer_size + (extra_buffer_size_one * 4);
-    }
+  int backend_memory_hit_warnings    = 0;
+  int backend_runtime_skip_warnings  = 0;
+  int backend_kernel_build_warnings  = 0;
+  int backend_kernel_create_warnings = 0;
+  int backend_kernel_accel_warnings  = 0;
+  int backend_extra_size_warning     = 0;
 
-    // kern type
+  backend_ctx->memory_hit_warning    = false;
+  backend_ctx->runtime_skip_warning  = false;
+  backend_ctx->kernel_build_warning  = false;
+  backend_ctx->kernel_create_warning = false;
+  backend_ctx->kernel_accel_warnings = false;
+  backend_ctx->extra_size_warning    = false;
+  backend_ctx->mixed_warnings        = false;
 
-    u32 kern_type = hashconfig->kern_type;
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
+  {
+    /**
+     * host buffer
+     */
 
-    if (module_ctx->module_kern_type_dynamic != MODULE_DEFAULT)
-    {
-      if (user_options->benchmark == true)
-      {
-      }
-      else
-      {
-        void        *digests_buf    = hashes->digests_buf;
-        salt_t      *salts_buf      = hashes->salts_buf;
-        void        *esalts_buf     = hashes->esalts_buf;
-        void        *hook_salts_buf = hashes->hook_salts_buf;
-        hashinfo_t **hash_info      = hashes->hash_info;
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
-        hashinfo_t *hash_info_ptr = NULL;
+    if (device_param->skipped == true) continue;
 
-        if (hash_info) hash_info_ptr = hash_info[0];
+    EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int));
 
-        kern_type = (u32) module_ctx->module_kern_type_dynamic (hashconfig, digests_buf, salts_buf, esalts_buf, hook_salts_buf, hash_info_ptr);
-      }
-    }
+    const int device_id = device_param->device_id;
 
-    // built options
+    /**
+     * module depending checks
+     */
 
-    const size_t build_options_sz = 4096;
+    device_param->skipped_warning = false;
 
-    char *build_options_buf = (char *) hcmalloc (build_options_sz);
+    if (module_ctx->module_unstable_warning != MODULE_DEFAULT)
+    {
+      const bool unstable_warning = module_ctx->module_unstable_warning (hashconfig, user_options, user_options_extra, device_param);
 
-    int build_options_len = 0;
+      if ((unstable_warning == true) && (user_options->force == false))
+      {
+        char runtime_name[7];
 
-    if ((device_param->is_cuda == true) || (device_param->is_hip == true))
-    {
-      // using a path with a space will break nvrtc_make_options_array_from_string()
-      // we add it to options array in a clean way later
+        memset (runtime_name, 0, sizeof (runtime_name));
 
-      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC ");
-    }
-    else
-    {
-      // tested on windows, linux, apple intel, apple silicon
-      // when is builded with cygwin and msys, cpath_real doesn't work
-      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
-      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", "OpenCL");
-      #else
-      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", folder_config->cpath_real);
-      #endif
+        if (device_param->is_cuda   == true) memcpy (runtime_name, "CUDA", 4);
+        if (device_param->is_hip    == true) memcpy (runtime_name, "HIP", 3);
+        #if defined (__APPLE__)
+        if (device_param->is_metal  == true) memcpy (runtime_name, "Metal", 5);
+        #endif
+        if (device_param->is_opencl == true) memcpy (runtime_name, "OpenCL", 6);
 
-      #if defined (__APPLE__)
-      if (is_apple_silicon() == true)
-      {
-        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D IS_APPLE_SILICON ");
-      }
-      #endif
-    }
+        event_log_warning (hashcat_ctx, "* Device #%u: Skipping (hash-mode %u)", device_id + 1, hashconfig->hash_mode);
+        event_log_warning (hashcat_ctx, "             This is due to a known %s runtime and/or device driver issue (not a hashcat issue)", runtime_name);
+        event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+        event_log_warning (hashcat_ctx, NULL);
 
-    /* currently disabled, hangs NEO drivers since 20.09.
-       was required for NEO driver 20.08 to workaround the same issue!
-       we go with the latest version
+        backend_runtime_skip_warnings++;
 
-    if (device_param->is_opencl == true)
-    {
-      if (device_param->use_opencl12 == true)
-      {
-        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.2 ");
-      }
-      else if (device_param->use_opencl20 == true)
-      {
-        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.0 ");
-      }
-      else if (device_param->use_opencl21 == true)
-      {
-        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.1 ");
+        device_param->skipped_warning = true;
+        continue;
       }
     }
-    */
 
-    // we don't have sm_* on vendors not NV but it doesn't matter
+    /**
+     * tuning db
+     */
 
-    #if defined (DEBUG)
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
-    #else
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
-    #endif
+    if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT)
+    {
+      const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra);
 
-    build_options_buf[build_options_len] = 0;
+      char *lines_buf = hcstrdup (extra_tuningdb_block);
 
-    /*
-    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
-    {
-      if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK)
-      {
-        strncat (build_options_buf, " -cl-opt-disable", 16);
-      }
-    }
-    */
+      char *saveptr = NULL;
 
-    #if defined (DEBUG)
-    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", device_id + 1, build_options_buf);
-    #endif
+      char *next = strtok_r (lines_buf, "\n", &saveptr);
 
-    /**
-     * device_name_chksum_amp_mp
-     */
+      int line_num = 0;
 
-    char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
+      do
+      {
+        line_num++;
 
-    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u",
-      backend_ctx->comptime,
-      backend_ctx->cuda_driver_version,
-      backend_ctx->hip_runtimeVersion,
-      device_param->is_opencl,
-      device_param->opencl_platform_vendor_id,
-      device_param->device_name,
-      device_param->opencl_device_version,
-      device_param->opencl_driver_version,
-      (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
+        const size_t line_len = strlen (next);
 
-    md5_ctx_t md5_ctx;
+        if (line_len == 0) continue;
 
-    md5_init   (&md5_ctx);
-    md5_update (&md5_ctx, (u32 *) device_name_chksum_amp_mp, dnclen_amp_mp);
-    md5_final  (&md5_ctx);
+        if (next[0] == '#') continue;
 
-    snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]);
+        tuning_db_process_line (hashcat_ctx, next, line_num);
 
-    /**
-     * kernel cache
-     */
+      } while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL);
 
-    bool cache_disable = false;
+      hcfree (lines_buf);
 
-    // Seems to be completely broken on Apple + (Intel?) CPU
-    // To reproduce set cache_disable to false and run benchmark -b
+      // todo: print loaded 'cnt' message
 
-    if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
-    {
-      if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
-      {
-        cache_disable = true;
-      }
-    }
+      // sort the database
 
-    if (module_ctx->module_jit_cache_disable != MODULE_DEFAULT)
-    {
-      cache_disable = module_ctx->module_jit_cache_disable (hashconfig, user_options, user_options_extra, hashes, device_param);
+      tuning_db_t *tuning_db = hashcat_ctx->tuning_db;
+
+      qsort (tuning_db->alias_buf, tuning_db->alias_cnt, sizeof (tuning_db_alias_t), sort_by_tuning_db_alias);
+      qsort (tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry);
     }
 
-    #if defined (DEBUG)
-    // https://github.com/hashcat/hashcat/issues/2750
-    cache_disable = true;
-    #endif
+    // vector_width
 
-    /**
-     * shared kernel with no hashconfig dependencies
-     */
+    int vector_width = 0;
 
+    if (user_options->backend_vector_width_chgd == false)
     {
-      /**
-       * kernel shared source filename
-       */
-
-      char source_file[256] = { 0 };
+      // tuning db
 
-      generate_source_kernel_shared_filename (folder_config->shared_dir, source_file);
+      tuning_db_entry_t *tuningdb_entry;
 
-      if (hc_path_read (source_file) == false)
+      if (user_options->slow_candidates == true)
       {
-        event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
-
-        return -1;
+        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode);
       }
-
-      /**
-       * kernel shared cached filename
-       */
-
-      char cached_file[256] = { 0 };
-
-      generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
-
-      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared);
-
-      if (rc_load_kernel == false)
+      else
       {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
-
-        return -1;
+        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
       }
 
-      if (device_param->is_cuda == true)
+      if (tuningdb_entry == NULL || tuningdb_entry->vector_width == -1)
       {
-        // GPU memset
-
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_memset, device_param->cuda_module_shared, "gpu_memset") == -1)
+        if (hashconfig->opti_type & OPTI_TYPE_USES_BITS_64)
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
+          if (device_param->is_cuda == true)
+          {
+            // cuda does not support this query
 
-        device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
+            vector_width = 1;
+          }
 
-        device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
+          if (device_param->is_hip == true)
+          {
+            // hip does not support this query
 
-        // GPU bzero
+            vector_width = 1;
+          }
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_bzero, device_param->cuda_module_shared, "gpu_bzero") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            // Metal does not support this query
 
-          backend_kernel_create_warnings++;
+            vector_width = 1;
+          }
+          #endif
 
-          device_param->skipped_warning = true;
-          continue;
+          if (device_param->is_opencl == true)
+          {
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
+          }
         }
+        else
+        {
+          if (device_param->is_cuda == true)
+          {
+            // cuda does not support this query
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero;
+            vector_width = 1;
+          }
 
-        device_param->kernel_preferred_wgs_multiple_bzero = device_param->cuda_warp_size;
+          if (device_param->is_hip == true)
+          {
+            // hip does not support this query
 
-        // GPU autotune init
+            vector_width = 1;
+          }
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_atinit, device_param->cuda_module_shared, "gpu_atinit") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            // Metal does not support this query
 
-          backend_kernel_create_warnings++;
+            vector_width = 1;
+          }
+          #endif
 
-          device_param->skipped_warning = true;
-          continue;
+          if (device_param->is_opencl == true)
+          {
+            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL) == -1)
+            {
+              device_param->skipped = true;
+              continue;
+            }
+          }
         }
+      }
+      else
+      {
+        vector_width = (cl_uint) tuningdb_entry->vector_width;
+      }
+    }
+    else
+    {
+      vector_width = user_options->backend_vector_width;
+    }
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
+    // We can't have SIMD in kernels where we have an unknown final password length
+    // It also turns out that pure kernels (that have a higher register pressure)
+    // actually run faster on scalar GPU (like 1080) without SIMD
 
-        device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit;
+    if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
+    {
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+      {
+        vector_width = 1;
+      }
+    }
 
-        device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size;
+    if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)
+    {
+      // not working in this mode because the GID does not align with password candidate count
+      // and if it cracks, it will crack the same hash twice, running into segfaults
 
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+      vector_width = 1;
+    }
 
-        // GPU decompress
+    if (vector_width > 16) vector_width = 16;
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_decompress, device_param->cuda_module_shared, "gpu_decompress") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
+    device_param->vector_width = vector_width;
 
-          backend_kernel_create_warnings++;
+    /**
+     * kernel accel and loops tuning db adjustment
+     */
 
-          device_param->skipped_warning = true;
-          continue;
-        }
+    device_param->kernel_accel_min   = hashconfig->kernel_accel_min;
+    device_param->kernel_accel_max   = hashconfig->kernel_accel_max;
+    device_param->kernel_loops_min   = hashconfig->kernel_loops_min;
+    device_param->kernel_loops_max   = hashconfig->kernel_loops_max;
+    device_param->kernel_threads_min = hashconfig->kernel_threads_min;
+    device_param->kernel_threads_max = hashconfig->kernel_threads_max;
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
+    tuning_db_entry_t *tuningdb_entry = NULL;
 
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
+    if (user_options->slow_candidates == true)
+    {
+      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode);
+    }
+    else
+    {
+      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
+    }
 
-        device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress;
+    // user commandline option override tuning db
+    // but both have to stay inside the boundaries of the module
 
-        device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
+    if (user_options->kernel_accel_chgd == true)
+    {
+      const u32 _kernel_accel = user_options->kernel_accel;
 
-        // GPU utf8 to utf16le conversion
+      if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max))
+      {
+        device_param->kernel_accel_min = _kernel_accel;
+        device_param->kernel_accel_max = _kernel_accel;
+      }
+    }
+    else
+    {
+      if (tuningdb_entry != NULL)
+      {
+        const u32 _kernel_accel = tuningdb_entry->kernel_accel;
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_utf8toutf16le, device_param->cuda_module_shared, "gpu_utf8_to_utf16") == -1)
+        if (_kernel_accel == (u32) -1) // native, makes sense if OPTS_TYPE_MP_MULTI_DISABLE is used
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
-
-          backend_kernel_create_warnings++;
+          if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+          {
+            if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT)
+            {
+              event_log_warning (hashcat_ctx, "ATTENTION! This hash-mode requires manual tuning to achieve full performance.");
+              event_log_warning (hashcat_ctx, "The loss of performance can be greater than 100%% without manual tuning.");
+              event_log_warning (hashcat_ctx, NULL);
+              event_log_warning (hashcat_ctx, "This warning message disappears after a definition for the installed");
+              event_log_warning (hashcat_ctx, "compute-device in this computer has been added to either list:");
+              event_log_warning (hashcat_ctx, "- src/modules/module_%05d.c", hashconfig->hash_mode);
+              event_log_warning (hashcat_ctx, "- hashcat.hctune");
+              event_log_warning (hashcat_ctx, NULL);
+              event_log_warning (hashcat_ctx, "For instructions on tuning, see src/modules/module_%05d.c", hashconfig->hash_mode);
+              event_log_warning (hashcat_ctx, "Also, consider sending a PR to Hashcat Master so that other users can benefit from your work.");
+              event_log_warning (hashcat_ctx, NULL);
+            }
+          }
 
-          device_param->skipped_warning = true;
-          continue;
+          device_param->kernel_accel_min = device_param->device_processors;
+          device_param->kernel_accel_max = device_param->device_processors;
         }
+        else
+        {
+          if (_kernel_accel)
+          {
+            if ((_kernel_accel >= device_param->kernel_accel_min) && (_kernel_accel <= device_param->kernel_accel_max))
+            {
+              device_param->kernel_accel_min = _kernel_accel;
+              device_param->kernel_accel_max = _kernel_accel;
+            }
+          }
+        }
+      }
+    }
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le;
+    if (user_options->kernel_loops_chgd == true)
+    {
+      const u32 _kernel_loops = user_options->kernel_loops;
 
-        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size;
+      if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max))
+      {
+        device_param->kernel_loops_min = _kernel_loops;
+        device_param->kernel_loops_max = _kernel_loops;
       }
-
-      if (device_param->is_hip == true)
+    }
+    else
+    {
+      if (tuningdb_entry != NULL)
       {
-        // GPU memset
+        u32 _kernel_loops = tuningdb_entry->kernel_loops;
 
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1)
+        if (_kernel_loops)
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
-
-          backend_kernel_create_warnings++;
+          if (user_options->workload_profile == 1)
+          {
+            _kernel_loops = (_kernel_loops > 8) ? _kernel_loops / 8 : 1;
+          }
+          else if (user_options->workload_profile == 2)
+          {
+            _kernel_loops = (_kernel_loops > 4) ? _kernel_loops / 4 : 1;
+          }
 
-          device_param->skipped_warning = true;
-          continue;
+          if ((_kernel_loops >= device_param->kernel_loops_min) && (_kernel_loops <= device_param->kernel_loops_max))
+          {
+            device_param->kernel_loops_min = _kernel_loops;
+            device_param->kernel_loops_max = _kernel_loops;
+          }
         }
+      }
+    }
 
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
+    // there's no thread column in tuning db, stick to commandline if defined
 
-        device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
+    if (user_options->kernel_threads_chgd == true)
+    {
+      const u32 _kernel_threads = user_options->kernel_threads;
 
-        device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size;
+      if ((_kernel_threads >= device_param->kernel_threads_min) && (_kernel_threads <= device_param->kernel_threads_max))
+      {
+        device_param->kernel_threads_min = _kernel_threads;
+        device_param->kernel_threads_max = _kernel_threads;
+      }
+    }
 
-        // GPU bzero
+    if (user_options->slow_candidates == true)
+    {
+    }
+    else
+    {
+      // we have some absolute limits for fast hashes (because of limit constant memory), make sure not to overstep
 
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_bzero, device_param->hip_module_shared, "gpu_bzero") == -1)
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
+          device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_RULES);
+          device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_RULES);
         }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero;
-
-        device_param->kernel_preferred_wgs_multiple_bzero = device_param->hip_warp_size;
-
-        // GPU autotune init
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1)
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
+          device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_COMBS);
+          device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_COMBS);
         }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit;
-
-        device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size;
-
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
-
-        // GPU decompress
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1)
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
+          device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, KERNEL_BFS);
+          device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, KERNEL_BFS);
         }
+      }
+    }
 
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress;
-
-        device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size;
+    device_param->kernel_loops_min_sav = device_param->kernel_loops_min;
+    device_param->kernel_loops_max_sav = device_param->kernel_loops_max;
 
-        // GPU utf8 to utf16le conversion
+    /**
+     * device properties
+     */
 
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
+    const u32 device_processors = device_param->device_processors;
 
-          backend_kernel_create_warnings++;
+    /**
+     * device threads
+     */
 
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le;
+    if (hashconfig->opts_type & OPTS_TYPE_MAXIMUM_THREADS)
+    {
+      // default for all, because the else branch is doing the same (nothing), but is actually used as a way to
+      // disable the default native thread configuration for HIP
+      // this can have negative performance if not tested on multiple different gpu architectures
+    }
+    else if (hashconfig->opts_type & OPTS_TYPE_NATIVE_THREADS)
+    {
+      u32 native_threads = 0;
 
-        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size;
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+      {
+        native_threads = 1;
+      }
+      else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+      {
+        native_threads = device_param->kernel_preferred_wgs_multiple;
+      }
+      else
+      {
+        // abort?
       }
 
-      if (device_param->is_opencl == true)
+      if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
       {
-        // GPU memset
+        device_param->kernel_threads_min = native_threads;
+        device_param->kernel_threads_max = native_threads;
+      }
+      else
+      {
+        // abort?
+      }
+    }
+    else
+    {
+      if (device_param->is_hip == true)
+      {
+        const u32 native_threads = device_param->kernel_preferred_wgs_multiple;
 
-        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_memset", &device_param->opencl_kernel_memset) == -1)
+        if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
+          device_param->kernel_threads_min = native_threads;
+          device_param->kernel_threads_max = native_threads;
         }
-
-        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset) == -1) return -1;
-
-        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
-
-        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1;
-
-        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset) == -1) return -1;
-
-        // GPU bzero
-
-        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_bzero", &device_param->opencl_kernel_bzero) == -1)
+        else
         {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
+          // abort?
         }
+      }
+    }
 
-        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
-
-        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
-
-        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_dynamic_local_mem_size_bzero) == -1) return -1;
-
-        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1;
-
-        // apple hack, but perhaps also an alternative for other vendors
-
-        if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero;
-
-        // GPU autotune init
-
-        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
+    // this seems to work always
 
-          backend_kernel_create_warnings++;
+    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+    {
+      u32 native_threads = 1;
 
-          device_param->skipped_warning = true;
-          continue;
-        }
+      if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
+      {
+        device_param->kernel_threads_min = native_threads;
+        device_param->kernel_threads_max = native_threads;
+      }
+    }
 
-        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      // set some limits with Metal
 
-        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
+      device_param->kernel_threads_max = 128;
+      device_param->kernel_loops_max = 1024;  // autotune go over ...
+    }
+    #endif
 
-        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1;
+    /**
+     * create context for each device
+     */
 
-        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit) == -1) return -1;
+    if (device_param->is_cuda == true)
+    {
+      if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        // GPU decompress
+      if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
 
-        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_decompress", &device_param->opencl_kernel_decompress) == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-          backend_kernel_create_warnings++;
+      if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
 
-          device_param->skipped_warning = true;
-          continue;
-        }
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      /**
+       * create command-queue
+       */
 
-        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
+      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+    #endif
 
-        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
+    if (device_param->is_opencl == true)
+    {
+      /*
+      cl_context_properties properties[3];
 
-        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1;
+      properties[0] = CL_CONTEXT_PLATFORM;
+      properties[1] = (cl_context_properties) device_param->opencl_platform;
+      properties[2] = 0;
 
-        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress) == -1) return -1;
+      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
+      */
 
-        // GPU utf8 to utf16le conversion
+      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_utf8_to_utf16", &device_param->opencl_kernel_utf8toutf16le) == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
+      /**
+       * create command-queue
+       */
 
-          backend_kernel_create_warnings++;
+      // not supported with NV
+      // device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
 
-          device_param->skipped_warning = true;
-          continue;
-        }
+      if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
 
-        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
+    /**
+     * create stream for CUDA devices
+     */
 
-        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
+    if (device_param->is_cuda == true)
+    {
+      if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
 
-        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1;
+    /**
+     * create stream for HIP devices
+     */
 
-        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_preferred_wgs_multiple_utf8toutf16le) == -1) return -1;
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
+      {
+        device_param->skipped = true;
+        continue;
       }
     }
 
     /**
-     * main kernel
+     * create events for CUDA devices
      */
 
+    if (device_param->is_cuda == true)
     {
-      char *build_options_module_buf = (char *) hcmalloc (build_options_sz);
-
-      int build_options_module_len = 0;
+      if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event1, CU_EVENT_BLOCKING_SYNC) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-      build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s ", build_options_buf);
+      if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_BLOCKING_SYNC) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-      if (module_ctx->module_jit_build_options != MODULE_DEFAULT)
+      if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event3, CU_EVENT_DISABLE_TIMING) == -1)
       {
-        char *jit_build_options = module_ctx->module_jit_build_options (hashconfig, user_options, user_options_extra, hashes, device_param);
+        device_param->skipped = true;
+        continue;
+      }
+    }
 
-        if (jit_build_options != NULL)
-        {
-          build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s", jit_build_options);
+    /**
+     * create events for HIP devices
+     */
 
-          // this is a bit ugly
-          // would be better to have the module return the value as value
-
-          u32 fixed_local_size = 0;
-
-          if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE=%u", &fixed_local_size) == 1)
-          {
-            device_param->kernel_threads_min = fixed_local_size;
-            device_param->kernel_threads_max = fixed_local_size;
-          }
-          else
-          {
-            // kernels specific minimum needs to be set so that self-test wont fail
-
-            if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", &fixed_local_size) == 1)
-            {
-              device_param->kernel_threads_min = fixed_local_size;
-              // device_param->kernel_threads_max = fixed_local_size;
-            }
-          }
-        }
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
+      {
+        device_param->skipped = true;
+        continue;
       }
 
-      build_options_module_buf[build_options_module_len] = 0;
-
-      #if defined (DEBUG)
-      if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", device_id + 1, build_options_module_buf);
-      #endif
-
-      /**
-       * device_name_chksum
-       */
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
 
-      char device_name_chksum[HCBUFSIZ_TINY] = { 0 };
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
 
-      // The kernel source can depend on some JiT compiler macros which themself depend on the attack_modes.
-      // ATM this is relevant only for ATTACK_MODE_ASSOCIATION which slightly modifies ATTACK_MODE_STRAIGHT kernels.
+    /**
+     * create input buffers on device : calculate size of fixed memory buffers
+     */
 
-      const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
+    u64 size_root_css   = SP_PW_MAX *           sizeof (cs_t);
+    u64 size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t);
 
-      const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s",
-        backend_ctx->comptime,
-        backend_ctx->cuda_driver_version,
-        backend_ctx->hip_runtimeVersion,
-        device_param->is_opencl,
-        device_param->opencl_platform_vendor_id,
-        device_param->device_name,
-        device_param->opencl_device_version,
-        device_param->opencl_driver_version,
-        device_param->vector_width,
-        hashconfig->kern_type,
-        extra_value,
-        (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max,
-        build_options_module_buf);
+    device_param->size_root_css   = size_root_css;
+    device_param->size_markov_css = size_markov_css;
 
-      md5_ctx_t md5_ctx;
+    u64 size_results = sizeof (u32);
 
-      md5_init   (&md5_ctx);
-      md5_update (&md5_ctx, (u32 *) device_name_chksum, dnclen);
-      md5_final  (&md5_ctx);
+    device_param->size_results = size_results;
 
-      snprintf (device_name_chksum, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]);
+    u64 size_rules   = (u64) straight_ctx->kernel_rules_cnt * sizeof (kernel_rule_t);
+    u64 size_rules_c = (u64) KERNEL_RULES                   * sizeof (kernel_rule_t);
 
-      /**
-       * kernel source filename
-       */
+    device_param->size_rules    = size_rules;
+    device_param->size_rules_c  = size_rules_c;
 
-      char source_file[256] = { 0 };
+    u64 size_plains  = (u64) hashes->digests_cnt * sizeof (plain_t);
+    u64 size_salts   = (u64) hashes->salts_cnt   * sizeof (salt_t);
+    u64 size_esalts  = (u64) hashes->digests_cnt * hashconfig->esalt_size;
+    u64 size_shown   = (u64) hashes->digests_cnt * sizeof (u32);
+    u64 size_digests = (u64) hashes->digests_cnt * (u64) hashconfig->dgst_size;
 
-      generate_source_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->shared_dir, source_file);
+    device_param->size_plains   = size_plains;
+    device_param->size_digests  = size_digests;
+    device_param->size_shown    = size_shown;
+    device_param->size_salts    = size_salts;
+    device_param->size_esalts   = size_esalts;
 
-      if (hc_path_read (source_file) == false)
-      {
-        event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
+    u64 size_combs          = KERNEL_COMBS * sizeof (pw_t);
+    u64 size_bfs            = KERNEL_BFS   * sizeof (bf_t);
+    u64 size_tm             = 32           * sizeof (bs_word_t);
+    u64 size_kernel_params  = 1            * sizeof (kernel_param_t);
 
-        return -1;
-      }
+    device_param->size_bfs           = size_bfs;
+    device_param->size_combs         = size_combs;
+    device_param->size_tm            = size_tm;
+    device_param->size_kernel_params = size_kernel_params;
 
-      /**
-       * kernel cached filename
-       */
+    u64 size_st_digests = 1 * hashconfig->dgst_size;
+    u64 size_st_salts   = 1 * sizeof (salt_t);
+    u64 size_st_esalts  = 1 * hashconfig->esalt_size;
 
-      char cached_file[256] = { 0 };
+    device_param->size_st_digests = size_st_digests;
+    device_param->size_st_salts   = size_st_salts;
+    device_param->size_st_esalts  = size_st_esalts;
 
-      generate_cached_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->cache_dir, device_name_chksum, cached_file);
+    // extra buffer
 
-      /**
-       * load kernel
-       */
+    u64 size_extra_buffer = 4;
 
-      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module);
+    if (module_ctx->module_extra_buffer_size != MODULE_DEFAULT)
+    {
+      const u64 extra_buffer_size = module_ctx->module_extra_buffer_size (hashconfig, user_options, user_options_extra, hashes, device_param);
 
-      if (rc_load_kernel == false)
+      if (extra_buffer_size == (u64) -1)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "Invalid extra buffer size.");
 
-        backend_kernel_build_warnings++;
+        backend_extra_size_warning++;
 
         device_param->skipped_warning = true;
         continue;
       }
 
-      hcfree (build_options_module_buf);
-    }
+      device_param->extra_buffer_size = extra_buffer_size;
 
-    /**
-     * word generator kernel
-     */
+      // for the size we actually allocate we need to cheat a bit in order to make it more easy for plugin developer.
+      //
+      // we will divide this size by 4 to workaround opencl limitation.
+      // this collides with a theoretical scenario (like -n1 -T1) where there's only one workitem,
+      // because inside the kernel the target buffer is selected by workitem_id / 4.
+      // but the maximum size of the buffer would be only 1/4 of what is needed -> overflow.
+      //
+      // to workaround this we make sure that there's always a full buffer in each of the 4 allocated buffers available.
 
-    if (user_options->slow_candidates == true)
-    {
+      const u64 kernel_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
+
+      const u64 extra_buffer_size_one = extra_buffer_size / kernel_power_max;
+
+      size_extra_buffer = extra_buffer_size + (extra_buffer_size_one * 4);
     }
-    else
+
+    // kern type
+
+    u32 kern_type = hashconfig->kern_type;
+
+    if (module_ctx->module_kern_type_dynamic != MODULE_DEFAULT)
     {
-      if ((user_options->attack_mode != ATTACK_MODE_STRAIGHT) && (user_options->attack_mode != ATTACK_MODE_ASSOCIATION))
+      if (user_options->benchmark == true)
       {
-        /**
-         * kernel mp source filename
-         */
+      }
+      else
+      {
+        void        *digests_buf    = hashes->digests_buf;
+        salt_t      *salts_buf      = hashes->salts_buf;
+        void        *esalts_buf     = hashes->esalts_buf;
+        void        *hook_salts_buf = hashes->hook_salts_buf;
+        hashinfo_t **hash_info      = hashes->hash_info;
 
-        char source_file[256] = { 0 };
+        hashinfo_t *hash_info_ptr = NULL;
 
-        generate_source_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->shared_dir, source_file);
+        if (hash_info) hash_info_ptr = hash_info[0];
 
-        if (hc_path_read (source_file) == false)
-        {
-          event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
+        kern_type = (u32) module_ctx->module_kern_type_dynamic (hashconfig, digests_buf, salts_buf, esalts_buf, hook_salts_buf, hash_info_ptr);
+      }
+    }
 
-          return -1;
-        }
+    // built options
 
-        /**
-         * kernel mp cached filename
-         */
+    const size_t build_options_sz = 4096;
 
-        char cached_file[256] = { 0 };
+    char *build_options_buf = (char *) hcmalloc (build_options_sz);
 
-        generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
+    int build_options_len = 0;
 
-        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp);
+    if ((device_param->is_cuda == true) || (device_param->is_hip == true))
+    {
+      // using a path with a space will break nvrtc_make_options_array_from_string()
+      // we add it to options array in a clean way later
 
-        if (rc_load_kernel == false)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC ");
+    }
+    else
+    {
+      // when is builded with cygwin and msys, cpath_real doesn't work
+      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
+      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=%s ", "OpenCL");
+      #else
+      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", folder_config->cpath_real);
+      #endif
 
-          return -1;
-        }
+      #if defined (__APPLE__)
+      if (is_apple_silicon() == true)
+      {
+        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D IS_APPLE_SILICON ");
       }
+      #endif
     }
 
-    /**
-     * amplifier kernel
-     */
+    /* currently disabled, hangs NEO drivers since 20.09.
+       was required for NEO driver 20.08 to workaround the same issue!
+       we go with the latest version
 
-    if (user_options->slow_candidates == true)
-    {
-    }
-    else
+    if (device_param->is_opencl == true)
     {
-      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      if (device_param->use_opencl12 == true)
       {
-
+        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.2 ");
       }
-      else
+      else if (device_param->use_opencl20 == true)
       {
-        /**
-         * kernel amp source filename
-         */
+        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.0 ");
+      }
+      else if (device_param->use_opencl21 == true)
+      {
+        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.1 ");
+      }
+    }
+    */
 
-        char source_file[256] = { 0 };
+    // we don't have sm_* on vendors not NV but it doesn't matter
 
-        generate_source_kernel_amp_filename (user_options_extra->attack_kern, folder_config->shared_dir, source_file);
+    #if defined (DEBUG)
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
+    #else
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
+    #endif
 
-        if (hc_path_read (source_file) == false)
-        {
-          event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
+    build_options_buf[build_options_len] = 0;
 
-          return -1;
-        }
+    /*
+    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+    {
+      if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK)
+      {
+        strncat (build_options_buf, " -cl-opt-disable", 16);
+      }
+    }
+    */
 
-        /**
-         * kernel amp cached filename
-         */
+    #if defined (DEBUG)
+    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", device_id + 1, build_options_buf);
+    #endif
 
-        char cached_file[256] = { 0 };
+    /**
+     * device_name_chksum_amp_mp
+     */
 
-        generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
+    char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
 
-        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp);
+    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%u",
+      backend_ctx->comptime,
+      backend_ctx->cuda_driver_version,
+      backend_ctx->hip_runtimeVersion,
+      backend_ctx->metal_runtimeVersion,
+      device_param->is_opencl,
+      device_param->opencl_platform_vendor_id,
+      device_param->device_name,
+      device_param->opencl_device_version,
+      device_param->opencl_driver_version,
+      (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
 
-        if (rc_load_kernel == false)
-        {
-          event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+    md5_ctx_t md5_ctx;
 
-          return -1;
-        }
+    md5_init   (&md5_ctx);
+    md5_update (&md5_ctx, (u32 *) device_name_chksum_amp_mp, dnclen_amp_mp);
+    md5_final  (&md5_ctx);
 
-        hcfree (build_options_buf);
-      }
-    }
+    snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]);
 
     /**
-     * no more need for the compiler. cuda doesn't offer this function.
-     * from opencl specs:
-     * Calls to clBuildProgram, clCompileProgram or clLinkProgram after clUnloadPlatformCompiler will reload the compiler, if necessary, to build the appropriate program executable.
+     * kernel cache
      */
-    // Disabled after user reporting weird errors like CL_OUT_OF_HOST_MEMORY after calling
-    /*
-    if (device_param->is_opencl == true)
-    {
-      cl_platform_id platform_id = backend_ctx->opencl_platforms[device_param->opencl_platform_id];
 
-      if (hc_clUnloadPlatformCompiler (hashcat_ctx, platform_id) == -1) return -1;
-    }
-    */
+    bool cache_disable = false;
 
-    // some algorithm collide too fast, make that impossible
+    // Seems to be completely broken on Apple + (Intel?) CPU
+    // To reproduce set cache_disable to false and run benchmark -b
 
-    if (user_options->benchmark == true)
+    if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
     {
-      ((u32 *) hashes->digests_buf)[0] = -1U;
-      ((u32 *) hashes->digests_buf)[1] = -1U;
-      ((u32 *) hashes->digests_buf)[2] = -1U;
-      ((u32 *) hashes->digests_buf)[3] = -1U;
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+      {
+        cache_disable = true;
+      }
+    }
+
+    if (module_ctx->module_jit_cache_disable != MODULE_DEFAULT)
+    {
+      cache_disable = module_ctx->module_jit_cache_disable (hashconfig, user_options, user_options_extra, hashes, device_param);
     }
 
+    #if defined (DEBUG)
+    // https://github.com/hashcat/hashcat/issues/2750
+    cache_disable = true;
+    #endif
+
     /**
-     * global buffers
+     * shared kernel with no hashconfig dependencies
      */
 
-    const u64 size_total_fixed
-      = bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + bitmap_ctx->bitmap_size
-      + size_plains
-      + size_digests
-      + size_shown
-      + size_salts
-      + size_results
-      + size_extra_buffer
-      + size_st_digests
-      + size_st_salts
-      + size_st_esalts
-      + size_esalts
-      + size_markov_css
-      + size_root_css
-      + size_rules
-      + size_rules_c
-      + size_tm
-      + size_kernel_params;
-
-    if (size_total_fixed > device_param->device_available_mem)
     {
-      event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this hashlist/ruleset.", device_id + 1);
+      /**
+       * kernel shared source filename
+       */
 
-      backend_memory_hit_warnings++;
+      char source_file[256] = { 0 };
 
-      device_param->skipped_warning = true;
-      continue;
-    }
+      generate_source_kernel_shared_filename (folder_config->shared_dir, source_file);
 
-    if (device_param->is_cuda == true)
-    {
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_a,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_b,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_c,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_d,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_a,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_b,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_c,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_d,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_plain_bufs,     size_plains)             == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_buf,    size_digests)            == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_shown,  size_shown)              == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_salt_bufs,      size_salts)              == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_result,         size_results)            == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra0_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra1_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra2_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra3_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_digests_buf, size_st_digests)         == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_salts_buf,   size_st_salts)           == -1) return -1;
-      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_kernel_param,   size_kernel_params)      == -1) return -1;
+      if (hc_path_read (source_file) == false)
+      {
+        event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
 
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf,     size_digests,            device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_salt_bufs,   hashes->salts_buf,       size_salts,              device_param->cuda_stream) == -1) return -1;
+        return -1;
+      }
 
       /**
-       * special buffers
+       * kernel shared cached filename
        */
 
-      if (user_options->slow_candidates == true)
+      char cached_file[256] = { 0 };
+
+      generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal);
+
+      #if defined (__APPLE__)
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared, &device_param->metal_library_shared);
+      #else
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared, NULL);
+      #endif
+
+      if (rc_load_kernel == false)
       {
-        if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+
+        return -1;
       }
-      else
+
+      if (device_param->is_cuda == true)
       {
-        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
-        {
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules,   size_rules) == -1) return -1;
+        // GPU memset
 
-          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-          {
-            size_t dummy = 0;
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_memset, device_param->cuda_module_shared, "gpu_memset") == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
 
-            if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_rules_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1;
-          }
-          else
-          {
-            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
-          }
+          backend_kernel_create_warnings++;
 
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->cuda_stream) == -1) return -1;
-        }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
-        {
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs,          size_combs)      == -1) return -1;
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs_c,        size_combs)      == -1) return -1;
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css)   == -1) return -1;
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1;
+          device_param->skipped_warning = true;
+          continue;
         }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
-        {
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs,            size_bfs)        == -1) return -1;
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css)   == -1) return -1;
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1;
-
-          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-          {
-            size_t dummy = 0;
 
-            if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_bfs_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1;
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
 
-            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm)       == -1) return -1;
-          }
-          else
-          {
-            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c,          size_bfs)      == -1) return -1;
-            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm)       == -1) return -1;
-          }
-        }
-      }
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
 
-      if (size_esalts)
-      {
-        if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_esalt_bufs, size_esalts) == -1) return -1;
+        device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
 
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->cuda_stream) == -1) return -1;
-      }
+        device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
 
-      if (hashconfig->st_hash != NULL)
-      {
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->cuda_stream) == -1) return -1;
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts,   device_param->cuda_stream)   == -1) return -1;
+        // GPU bzero
 
-        if (size_esalts)
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_bzero, device_param->cuda_module_shared, "gpu_bzero") == -1)
         {
-          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_esalts_buf, size_st_esalts) == -1) return -1;
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
 
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->cuda_stream) == -1) return -1;
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-      }
-    }
 
-    if (device_param->is_hip == true)
-    {
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d,    bitmap_ctx->bitmap_size) == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs,     size_plains)             == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf,    size_digests)            == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown,  size_shown)              == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs,      size_salts)              == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result,         size_results)            == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf,     size_extra_buffer / 4)   == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests)         == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf,   size_st_salts)           == -1) return -1;
-      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_kernel_param,   size_kernel_params)      == -1) return -1;
-
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf,     size_digests,            device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_salt_bufs,   hashes->salts_buf,       size_salts,              device_param->hip_stream) == -1) return -1;
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
 
-      /**
-       * special buffers
-       */
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
 
-      if (user_options->slow_candidates == true)
-      {
-        if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
-      }
-      else
-      {
-        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
-        {
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules,   size_rules) == -1) return -1;
+        device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero;
 
-          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-          {
-            size_t dummy = 0;
+        device_param->kernel_preferred_wgs_multiple_bzero = device_param->cuda_warp_size;
 
-            if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1;
-          }
-          else
-          {
-            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
-          }
+        // GPU autotune init
 
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->hip_stream) == -1) return -1;
-        }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_atinit, device_param->cuda_module_shared, "gpu_atinit") == -1)
         {
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs,          size_combs)      == -1) return -1;
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c,        size_combs)      == -1) return -1;
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf,   size_root_css)   == -1) return -1;
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1;
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
-        {
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs,            size_bfs)        == -1) return -1;
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf,   size_root_css)   == -1) return -1;
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1;
 
-          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-          {
-            size_t dummy = 0;
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
 
-            if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1;
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
 
-            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c,           size_tm)       == -1) return -1;
-          }
-          else
-          {
-            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c,          size_bfs)      == -1) return -1;
-            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c,           size_tm)       == -1) return -1;
-          }
+        device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit;
+
+        device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size;
+
+        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
+        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+
+        // GPU decompress
+
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_decompress, device_param->cuda_module_shared, "gpu_decompress") == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-      }
 
-      if (size_esalts)
-      {
-        if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1;
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
 
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->hip_stream) == -1) return -1;
-      }
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
 
-      if (hashconfig->st_hash != NULL)
-      {
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->hip_stream) == -1) return -1;
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts,   device_param->hip_stream) == -1) return -1;
+        device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress;
 
-        if (size_esalts)
+        device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
+
+        // GPU utf8 to utf16le conversion
+
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_utf8toutf16le, device_param->cuda_module_shared, "gpu_utf8_to_utf16") == -1)
         {
-          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1;
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
 
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->hip_stream) == -1) return -1;
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-      }
-    }
 
-    if (device_param->is_opencl == true)
-    {
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->opencl_d_plain_bufs)     == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->opencl_d_digests_buf)    == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->opencl_d_digests_shown)  == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->opencl_d_salt_bufs)      == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->opencl_d_result)         == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra0_buf)     == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra1_buf)     == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra2_buf)     == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra3_buf)     == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->opencl_d_st_digests_buf) == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->opencl_d_st_salts_buf)   == -1) return -1;
-      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_kernel_params,      NULL, &device_param->opencl_d_kernel_param)   == -1) return -1;
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
 
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_FALSE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs,   CL_FALSE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL) == -1) return -1;
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
 
-      /**
-       * special buffers
-       */
+        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le;
 
-      if (user_options->slow_candidates == true)
-      {
-        if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c)   == -1) return -1;
+        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size;
       }
-      else
+
+      if (device_param->is_hip == true)
       {
-        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
-        {
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->opencl_d_rules)   == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1;
+        // GPU memset
 
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_FALSE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL) == -1) return -1;
-        }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1)
         {
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs)          == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs_c)        == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf)   == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1;
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
+
+        device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
+
+        device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size;
+
+        // GPU bzero
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_bzero, device_param->hip_module_shared, "gpu_bzero") == -1)
         {
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs)            == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs_c)          == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->opencl_d_tm_c)           == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf)   == -1) return -1;
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1;
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-      }
 
-      if (size_esalts)
-      {
-        if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs) == -1) return -1;
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
 
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_FALSE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL) == -1) return -1;
-      }
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
 
-      if (hashconfig->st_hash != NULL)
-      {
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf,  CL_FALSE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL) == -1) return -1;
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf,    CL_FALSE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL) == -1) return -1;
+        device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero;
 
-        if (size_esalts)
+        device_param->kernel_preferred_wgs_multiple_bzero = device_param->hip_warp_size;
+
+        // GPU autotune init
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1)
         {
-          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf) == -1) return -1;
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
 
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_FALSE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL) == -1) return -1;
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
         }
-      }
 
-      if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
-    }
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
 
-    /**
-     * kernel args
-     */
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
 
-    device_param->kernel_param.bitmap_mask         = bitmap_ctx->bitmap_mask;
-    device_param->kernel_param.bitmap_shift1       = bitmap_ctx->bitmap_shift1;
-    device_param->kernel_param.bitmap_shift2       = bitmap_ctx->bitmap_shift2;
-    device_param->kernel_param.salt_pos_host       = 0;
-    device_param->kernel_param.loop_pos            = 0;
-    device_param->kernel_param.loop_cnt            = 0;
-    device_param->kernel_param.il_cnt              = 0;
-    device_param->kernel_param.digests_cnt         = 0;
-    device_param->kernel_param.digests_offset_host = 0;
+        device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit;
+
+        device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size;
+
+        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
+        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+
+        // GPU decompress
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
+
+        device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress;
+
+        device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size;
+
+        // GPU utf8 to utf16le conversion
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
+
+        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le;
+
+        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size;
+      }
+
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        // GPU memset
+
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_memset", &device_param->metal_function_memset, &device_param->metal_pipeline_memset) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_memset, &device_param->kernel_wgs_memset) == -1) return -1;
+
+        device_param->kernel_local_mem_size_memset = 0;
+        device_param->kernel_dynamic_local_mem_size_memset = 0;
+        device_param->kernel_preferred_wgs_multiple_memset = device_param->metal_warp_size;
+
+        // GPU bzero
+
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_bzero", &device_param->metal_function_bzero, &device_param->metal_pipeline_bzero) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
+
+        device_param->kernel_local_mem_size_bzero = 0;
+        device_param->kernel_dynamic_local_mem_size_bzero = 0;
+        device_param->kernel_preferred_wgs_multiple_bzero = device_param->metal_warp_size;
+
+        // GPU autotune init
+
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_atinit", &device_param->metal_function_atinit, &device_param->metal_pipeline_atinit) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
+
+        device_param->kernel_local_mem_size_atinit = 0;
+        device_param->kernel_dynamic_local_mem_size_atinit = 0;
+        device_param->kernel_preferred_wgs_multiple_atinit = device_param->metal_warp_size;
+
+        // GPU decompress
+
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_decompress", &device_param->metal_function_decompress, &device_param->metal_pipeline_decompress) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
+
+        device_param->kernel_local_mem_size_decompress = 0;
+        device_param->kernel_dynamic_local_mem_size_decompress = 0;
+        device_param->kernel_preferred_wgs_multiple_decompress = device_param->metal_warp_size;
+
+        // GPU utf8 to utf16le conversion
+
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_shared, "gpu_utf8_to_utf16", &device_param->metal_function_utf8toutf16le, &device_param->metal_pipeline_utf8toutf16le) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
+
+        device_param->kernel_local_mem_size_utf8toutf16le = 0;
+        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = 0;
+        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->metal_warp_size;
+      }
+      #endif
+
+      if (device_param->is_opencl == true)
+      {
+        // GPU memset
+
+        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_memset", &device_param->opencl_kernel_memset) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset) == -1) return -1;
+
+        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
+
+        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1;
+
+        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset) == -1) return -1;
+
+        // GPU bzero
+
+        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_bzero", &device_param->opencl_kernel_bzero) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
+
+        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
+
+        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_dynamic_local_mem_size_bzero) == -1) return -1;
+
+        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1;
+
+        // apple hack, but perhaps also an alternative for other vendors
+
+        if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero;
+
+        // GPU autotune init
+
+        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
+
+        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
+
+        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1;
+
+        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit) == -1) return -1;
+
+        // GPU decompress
+
+        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_decompress", &device_param->opencl_kernel_decompress) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
+
+        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
+
+        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1;
+
+        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress) == -1) return -1;
+
+        // GPU utf8 to utf16le conversion
+
+        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_utf8_to_utf16", &device_param->opencl_kernel_utf8toutf16le) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
+
+        if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
+
+        if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1;
+
+        if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_preferred_wgs_multiple_utf8toutf16le) == -1) return -1;
+      }
+    }
+
+    /**
+     * main kernel
+     */
+
+    {
+      char *build_options_module_buf = (char *) hcmalloc (build_options_sz);
+
+      int build_options_module_len = 0;
+
+      build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s ", build_options_buf);
+
+      if (module_ctx->module_jit_build_options != MODULE_DEFAULT)
+      {
+        char *jit_build_options = module_ctx->module_jit_build_options (hashconfig, user_options, user_options_extra, hashes, device_param);
+
+        if (jit_build_options != NULL)
+        {
+          build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s", jit_build_options);
+
+          // this is a bit ugly
+          // would be better to have the module return the value as value
+
+          u32 fixed_local_size = 0;
+
+          if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE=%u", &fixed_local_size) == 1)
+          {
+            device_param->kernel_threads_min = fixed_local_size;
+            device_param->kernel_threads_max = fixed_local_size;
+          }
+          else
+          {
+            // kernels specific minimum needs to be set so that self-test wont fail
+
+            if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", &fixed_local_size) == 1)
+            {
+              device_param->kernel_threads_min = fixed_local_size;
+              // device_param->kernel_threads_max = fixed_local_size;
+            }
+          }
+        }
+      }
+
+      build_options_module_buf[build_options_module_len] = 0;
+
+      #if defined (DEBUG)
+      if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", device_id + 1, build_options_module_buf);
+      #endif
+
+      /**
+       * device_name_chksum
+       */
+
+      char device_name_chksum[HCBUFSIZ_TINY] = { 0 };
+
+      // The kernel source can depend on some JiT compiler macros which themself depend on the attack_modes.
+      // ATM this is relevant only for ATTACK_MODE_ASSOCIATION which slightly modifies ATTACK_MODE_STRAIGHT kernels.
+
+      const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
+
+      const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s",
+        backend_ctx->comptime,
+        backend_ctx->cuda_driver_version,
+        backend_ctx->hip_runtimeVersion,
+        backend_ctx->metal_runtimeVersion,
+        device_param->is_opencl,
+        device_param->opencl_platform_vendor_id,
+        device_param->device_name,
+        device_param->opencl_device_version,
+        device_param->opencl_driver_version,
+        device_param->vector_width,
+        hashconfig->kern_type,
+        extra_value,
+        (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max,
+        build_options_module_buf);
+
+      memset     (&md5_ctx, 0, sizeof (md5_ctx_t));
+      md5_init   (&md5_ctx);
+      md5_update (&md5_ctx, (u32 *) device_name_chksum, dnclen);
+      md5_final  (&md5_ctx);
+
+      snprintf (device_name_chksum, HCBUFSIZ_TINY, "%08x", md5_ctx.h[0]);
+
+      /**
+       * kernel source filename
+       */
+
+      char source_file[256] = { 0 };
+
+      generate_source_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->shared_dir, source_file);
+
+      if (hc_path_read (source_file) == false)
+      {
+        event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
+
+        return -1;
+      }
+
+      /**
+       * kernel cached filename
+       */
+
+      char cached_file[256] = { 0 };
+
+      generate_cached_kernel_filename (user_options->slow_candidates, hashconfig->attack_exec, user_options_extra->attack_kern, kern_type, hashconfig->opti_type, folder_config->cache_dir, device_name_chksum, cached_file, device_param->is_metal);
+
+      /**
+       * load kernel
+       */
+
+      #if defined (__APPLE__)
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module, &device_param->metal_library);
+      #else
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module, NULL);
+      #endif
+
+      if (rc_load_kernel == false)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+
+        backend_kernel_build_warnings++;
+
+        device_param->skipped_warning = true;
+        continue;
+      }
+
+      hcfree (build_options_module_buf);
+    }
+
+    /**
+     * word generator kernel
+     */
+
+    if (user_options->slow_candidates == true)
+    {
+    }
+    else
+    {
+      if ((user_options->attack_mode != ATTACK_MODE_STRAIGHT) && (user_options->attack_mode != ATTACK_MODE_ASSOCIATION))
+      {
+        /**
+         * kernel mp source filename
+         */
+
+        char source_file[256] = { 0 };
+
+        generate_source_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->shared_dir, source_file);
+
+        if (hc_path_read (source_file) == false)
+        {
+          event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
+
+          return -1;
+        }
+
+        /**
+         * kernel mp cached filename
+         */
+
+        char cached_file[256] = { 0 };
+
+        generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal);
+
+        #if defined (__APPLE__)
+        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp, &device_param->metal_library_mp);
+        #else
+        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp, NULL);
+        #endif
+
+        if (rc_load_kernel == false)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+
+          return -1;
+        }
+      }
+    }
+
+    /**
+     * amplifier kernel
+     */
+
+    if (user_options->slow_candidates == true)
+    {
+    }
+    else
+    {
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      {
+
+      }
+      else
+      {
+        /**
+         * kernel amp source filename
+         */
+
+        char source_file[256] = { 0 };
+
+        generate_source_kernel_amp_filename (user_options_extra->attack_kern, folder_config->shared_dir, source_file);
+
+        if (hc_path_read (source_file) == false)
+        {
+          event_log_error (hashcat_ctx, "%s: %s", source_file, strerror (errno));
+
+          return -1;
+        }
+
+        /**
+         * kernel amp cached filename
+         */
+
+        char cached_file[256] = { 0 };
+
+        generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal);
+
+        #if defined (__APPLE__)
+        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp, &device_param->metal_library_amp);
+        #else
+        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp, NULL);
+        #endif
+
+        if (rc_load_kernel == false)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+
+          return -1;
+        }
+
+        hcfree (build_options_buf);
+      }
+    }
+
+    /**
+     * no more need for the compiler. cuda doesn't offer this function.
+     * from opencl specs:
+     * Calls to clBuildProgram, clCompileProgram or clLinkProgram after clUnloadPlatformCompiler will reload the compiler, if necessary, to build the appropriate program executable.
+     */
+    // Disabled after user reporting weird errors like CL_OUT_OF_HOST_MEMORY after calling
+    /*
+    if (device_param->is_opencl == true)
+    {
+      cl_platform_id platform_id = backend_ctx->opencl_platforms[device_param->opencl_platform_id];
+
+      if (hc_clUnloadPlatformCompiler (hashcat_ctx, platform_id) == -1) return -1;
+    }
+    */
+
+    // some algorithm collide too fast, make that impossible
+
+    if (user_options->benchmark == true)
+    {
+      ((u32 *) hashes->digests_buf)[0] = -1U;
+      ((u32 *) hashes->digests_buf)[1] = -1U;
+      ((u32 *) hashes->digests_buf)[2] = -1U;
+      ((u32 *) hashes->digests_buf)[3] = -1U;
+    }
+
+    /**
+     * global buffers
+     */
+
+    const u64 size_total_fixed
+      = bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + bitmap_ctx->bitmap_size
+      + size_plains
+      + size_digests
+      + size_shown
+      + size_salts
+      + size_results
+      + size_extra_buffer
+      + size_st_digests
+      + size_st_salts
+      + size_st_esalts
+      + size_esalts
+      + size_markov_css
+      + size_root_css
+      + size_rules
+      + size_rules_c
+      + size_tm
+      + size_kernel_params;
+
+    if (size_total_fixed > device_param->device_available_mem)
+    {
+      event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this hashlist/ruleset.", device_id + 1);
+
+      backend_memory_hit_warnings++;
+
+      device_param->skipped_warning = true;
+      continue;
+    }
+
+    if (device_param->is_cuda == true)
+    {
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_a,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_b,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_c,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_d,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_a,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_b,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_c,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_d,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_plain_bufs,     size_plains)             == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_buf,    size_digests)            == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_shown,  size_shown)              == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_salt_bufs,      size_salts)              == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_result,         size_results)            == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra0_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra1_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra2_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra3_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_digests_buf, size_st_digests)         == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_salts_buf,   size_st_salts)           == -1) return -1;
+      if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_kernel_param,   size_kernel_params)      == -1) return -1;
+
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf,     size_digests,            device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_salt_bufs,   hashes->salts_buf,       size_salts,              device_param->cuda_stream) == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules,   size_rules) == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy = 0;
+
+            if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_rules_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1;
+          }
+          else
+          {
+            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
+          }
+
+          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->cuda_stream) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs,          size_combs)      == -1) return -1;
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs_c,        size_combs)      == -1) return -1;
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css)   == -1) return -1;
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs,            size_bfs)        == -1) return -1;
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css)   == -1) return -1;
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css) == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy = 0;
+
+            if (hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_bfs_c, &dummy, device_param->cuda_module, "generic_constant") == -1) return -1;
+
+            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm)       == -1) return -1;
+          }
+          else
+          {
+            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c,          size_bfs)      == -1) return -1;
+            if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm)       == -1) return -1;
+          }
+        }
+      }
+
+      if (size_esalts)
+      {
+        if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_esalt_bufs, size_esalts) == -1) return -1;
+
+        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->cuda_stream) == -1) return -1;
+      }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts,   device_param->cuda_stream)   == -1) return -1;
+
+        if (size_esalts)
+        {
+          if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_esalts_buf, size_st_esalts) == -1) return -1;
+
+          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->cuda_stream) == -1) return -1;
+        }
+      }
+    }
+
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs,     size_plains)             == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf,    size_digests)            == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown,  size_shown)              == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs,      size_salts)              == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result,         size_results)            == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests)         == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf,   size_st_salts)           == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_kernel_param,   size_kernel_params)      == -1) return -1;
+
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf,     size_digests,            device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_salt_bufs,   hashes->salts_buf,       size_salts,              device_param->hip_stream) == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules,   size_rules) == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy = 0;
+
+            if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1;
+          }
+          else
+          {
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+          }
+
+          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules, device_param->hip_stream) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs,          size_combs)      == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c,        size_combs)      == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf,   size_root_css)   == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs,            size_bfs)        == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf,   size_root_css)   == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy = 0;
+
+            if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1;
+
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c,           size_tm)       == -1) return -1;
+          }
+          else
+          {
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c,          size_bfs)      == -1) return -1;
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c,           size_tm)       == -1) return -1;
+          }
+        }
+      }
+
+      if (size_esalts)
+      {
+        if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1;
+
+        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->hip_stream) == -1) return -1;
+      }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts,   device_param->hip_stream) == -1) return -1;
+
+        if (size_esalts)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1;
+
+          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->hip_stream) == -1) return -1;
+        }
+      }
+    }
+
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      // gpu only
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_a)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_b)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_c)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s1_d)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_a)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_b)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_c)    == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, bitmap_ctx->bitmap_size, NULL, &device_param->metal_d_bitmap_s2_d)    == -1) return -1;
+
+      // shared
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_plains,             NULL, &device_param->metal_d_plain_bufs)     == -1) return -1;
+
+      // gpu only
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_digests,            NULL, &device_param->metal_d_digests_buf)    == -1) return -1;
+
+      // shared
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_shown,              NULL, &device_param->metal_d_digests_shown)  == -1) return -1;
+
+      // gpu only
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_salts,              NULL, &device_param->metal_d_salt_bufs)      == -1) return -1;
+
+      // shared
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_results,            NULL, &device_param->metal_d_result)         == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4,   NULL, &device_param->metal_d_extra0_buf)     == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4,   NULL, &device_param->metal_d_extra1_buf)     == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4,   NULL, &device_param->metal_d_extra2_buf)     == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_extra_buffer / 4,   NULL, &device_param->metal_d_extra3_buf)     == -1) return -1;
+
+      // gpu only
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_st_digests,         NULL, &device_param->metal_d_st_digests_buf) == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_st_salts,           NULL, &device_param->metal_d_st_salts_buf)   == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_kernel_params,      NULL, &device_param->metal_d_kernel_param)   == -1) return -1;
+
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_a, 0, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_b, 0, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_c, 0, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s1_d, 0, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_a, 0, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_b, 0, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_c, 0, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bitmap_s2_d, 0, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_digests_buf, 0, hashes->digests_buf,     size_digests)            == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_salt_bufs,   0, hashes->salts_buf,       size_salts)              == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        // gpu only
+        if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_rules_c, NULL, &device_param->metal_d_rules_c) == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          // gpu only
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_rules, NULL, &device_param->metal_d_rules) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_rules_c, NULL, &device_param->metal_d_rules_c) == -1) return -1;
+
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_rules, 0, straight_ctx->kernel_rules_buf, size_rules) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          // gpu only
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_combs, NULL, &device_param->metal_d_combs) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_combs, NULL, &device_param->metal_d_combs_c) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_root_css, NULL, &device_param->metal_d_root_css_buf) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_markov_css, NULL, &device_param->metal_d_markov_css_buf) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          // gpu only
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_bfs, NULL, &device_param->metal_d_bfs) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_bfs, NULL, &device_param->metal_d_bfs_c) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_tm, NULL, &device_param->metal_d_tm_c) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_root_css, NULL, &device_param->metal_d_root_css_buf) == -1) return -1;
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_markov_css, NULL, &device_param->metal_d_markov_css_buf) == -1) return -1;
+        }
+      }
+
+      if (size_esalts)
+      {
+        // gpu only
+        if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_esalts, NULL, &device_param->metal_d_esalt_bufs) == -1) return -1;
+
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_esalt_bufs, 0, hashes->esalts_buf, size_esalts) == -1) return -1;
+      }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_st_digests_buf, 0, hashes->st_digests_buf, size_st_digests) == -1) return -1;
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_st_salts_buf, 0, hashes->st_salts_buf, size_st_salts) == -1) return -1;
+
+        if (size_esalts)
+        {
+          // gpu only
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_st_esalts, NULL, &device_param->metal_d_st_esalts_buf) == -1) return -1;
+
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_st_esalts_buf, 0, hashes->st_esalts_buf, size_st_esalts) == -1) return -1;
+        }
+      }
+    }
+    #endif // __APPLE__
+
+    if (device_param->is_opencl == true)
+    {
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->opencl_d_plain_bufs)     == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->opencl_d_digests_buf)    == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->opencl_d_digests_shown)  == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->opencl_d_salt_bufs)      == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->opencl_d_result)         == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra0_buf)     == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra1_buf)     == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra2_buf)     == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra3_buf)     == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->opencl_d_st_digests_buf) == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->opencl_d_st_salts_buf)   == -1) return -1;
+      if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_kernel_params,      NULL, &device_param->opencl_d_kernel_param)   == -1) return -1;
+
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_FALSE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs,   CL_FALSE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL) == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c)   == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->opencl_d_rules)   == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1;
+
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_FALSE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs)          == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs_c)        == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf)   == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs)            == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs_c)          == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->opencl_d_tm_c)           == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf)   == -1) return -1;
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf) == -1) return -1;
+        }
+      }
+
+      if (size_esalts)
+      {
+        if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs) == -1) return -1;
+
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_FALSE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL) == -1) return -1;
+      }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf,  CL_FALSE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf,    CL_FALSE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL) == -1) return -1;
+
+        if (size_esalts)
+        {
+          if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf) == -1) return -1;
+
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_FALSE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL) == -1) return -1;
+        }
+      }
+
+      if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
+    }
+
+    /**
+     * kernel args
+     */
+
+    device_param->kernel_param.bitmap_mask         = bitmap_ctx->bitmap_mask;
+    device_param->kernel_param.bitmap_shift1       = bitmap_ctx->bitmap_shift1;
+    device_param->kernel_param.bitmap_shift2       = bitmap_ctx->bitmap_shift2;
+    device_param->kernel_param.salt_pos_host       = 0;
+    device_param->kernel_param.loop_pos            = 0;
+    device_param->kernel_param.loop_cnt            = 0;
+    device_param->kernel_param.il_cnt              = 0;
+    device_param->kernel_param.digests_cnt         = 0;
+    device_param->kernel_param.digests_offset_host = 0;
     device_param->kernel_param.combs_mode          = 0;
     device_param->kernel_param.salt_repeat         = 0;
     device_param->kernel_param.combs_mode          = 0;
@@ -9147,357 +10676,1283 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->kernel_param.pws_pos             = 0;
     device_param->kernel_param.gid_max             = 0;
 
-    if (device_param->is_cuda == true)
-    {
-      device_param->kernel_params[ 0] = NULL; // &device_param->cuda_d_pws_buf;
-      device_param->kernel_params[ 1] = &device_param->cuda_d_rules_c;
-      device_param->kernel_params[ 2] = &device_param->cuda_d_combs_c;
-      device_param->kernel_params[ 3] = &device_param->cuda_d_bfs_c;
-      device_param->kernel_params[ 4] = NULL; // &device_param->cuda_d_tmps;
-      device_param->kernel_params[ 5] = NULL; // &device_param->cuda_d_hooks;
-      device_param->kernel_params[ 6] = &device_param->cuda_d_bitmap_s1_a;
-      device_param->kernel_params[ 7] = &device_param->cuda_d_bitmap_s1_b;
-      device_param->kernel_params[ 8] = &device_param->cuda_d_bitmap_s1_c;
-      device_param->kernel_params[ 9] = &device_param->cuda_d_bitmap_s1_d;
-      device_param->kernel_params[10] = &device_param->cuda_d_bitmap_s2_a;
-      device_param->kernel_params[11] = &device_param->cuda_d_bitmap_s2_b;
-      device_param->kernel_params[12] = &device_param->cuda_d_bitmap_s2_c;
-      device_param->kernel_params[13] = &device_param->cuda_d_bitmap_s2_d;
-      device_param->kernel_params[14] = &device_param->cuda_d_plain_bufs;
-      device_param->kernel_params[15] = &device_param->cuda_d_digests_buf;
-      device_param->kernel_params[16] = &device_param->cuda_d_digests_shown;
-      device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs;
-      device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs;
-      device_param->kernel_params[19] = &device_param->cuda_d_result;
-      device_param->kernel_params[20] = &device_param->cuda_d_extra0_buf;
-      device_param->kernel_params[21] = &device_param->cuda_d_extra1_buf;
-      device_param->kernel_params[22] = &device_param->cuda_d_extra2_buf;
-      device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf;
-      device_param->kernel_params[24] = &device_param->cuda_d_kernel_param;
-    }
+    if (device_param->is_cuda == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // &device_param->cuda_d_pws_buf;
+      device_param->kernel_params[ 1] = &device_param->cuda_d_rules_c;
+      device_param->kernel_params[ 2] = &device_param->cuda_d_combs_c;
+      device_param->kernel_params[ 3] = &device_param->cuda_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // &device_param->cuda_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // &device_param->cuda_d_hooks;
+      device_param->kernel_params[ 6] = &device_param->cuda_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = &device_param->cuda_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = &device_param->cuda_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = &device_param->cuda_d_bitmap_s1_d;
+      device_param->kernel_params[10] = &device_param->cuda_d_bitmap_s2_a;
+      device_param->kernel_params[11] = &device_param->cuda_d_bitmap_s2_b;
+      device_param->kernel_params[12] = &device_param->cuda_d_bitmap_s2_c;
+      device_param->kernel_params[13] = &device_param->cuda_d_bitmap_s2_d;
+      device_param->kernel_params[14] = &device_param->cuda_d_plain_bufs;
+      device_param->kernel_params[15] = &device_param->cuda_d_digests_buf;
+      device_param->kernel_params[16] = &device_param->cuda_d_digests_shown;
+      device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs;
+      device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs;
+      device_param->kernel_params[19] = &device_param->cuda_d_result;
+      device_param->kernel_params[20] = &device_param->cuda_d_extra0_buf;
+      device_param->kernel_params[21] = &device_param->cuda_d_extra1_buf;
+      device_param->kernel_params[22] = &device_param->cuda_d_extra2_buf;
+      device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf;
+      device_param->kernel_params[24] = &device_param->cuda_d_kernel_param;
+    }
+
+    if (device_param->is_hip == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf;
+      device_param->kernel_params[ 1] = &device_param->hip_d_rules_c;
+      device_param->kernel_params[ 2] = &device_param->hip_d_combs_c;
+      device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks;
+      device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d;
+      device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a;
+      device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b;
+      device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c;
+      device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d;
+      device_param->kernel_params[14] = &device_param->hip_d_plain_bufs;
+      device_param->kernel_params[15] = &device_param->hip_d_digests_buf;
+      device_param->kernel_params[16] = &device_param->hip_d_digests_shown;
+      device_param->kernel_params[17] = &device_param->hip_d_salt_bufs;
+      device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs;
+      device_param->kernel_params[19] = &device_param->hip_d_result;
+      device_param->kernel_params[20] = &device_param->hip_d_extra0_buf;
+      device_param->kernel_params[21] = &device_param->hip_d_extra1_buf;
+      device_param->kernel_params[22] = &device_param->hip_d_extra2_buf;
+      device_param->kernel_params[23] = &device_param->hip_d_extra3_buf;
+      device_param->kernel_params[24] = &device_param->hip_d_kernel_param;
+    }
+
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // device_param->metal_d_pws_buf;
+      device_param->kernel_params[ 1] = device_param->metal_d_rules_c;
+      device_param->kernel_params[ 2] = device_param->metal_d_combs_c;
+      device_param->kernel_params[ 3] = device_param->metal_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // device_param->metal_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // device_param->metal_d_hooks;
+      device_param->kernel_params[ 6] = device_param->metal_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = device_param->metal_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = device_param->metal_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = device_param->metal_d_bitmap_s1_d;
+      device_param->kernel_params[10] = device_param->metal_d_bitmap_s2_a;
+      device_param->kernel_params[11] = device_param->metal_d_bitmap_s2_b;
+      device_param->kernel_params[12] = device_param->metal_d_bitmap_s2_c;
+      device_param->kernel_params[13] = device_param->metal_d_bitmap_s2_d;
+      device_param->kernel_params[14] = device_param->metal_d_plain_bufs;
+      device_param->kernel_params[15] = device_param->metal_d_digests_buf;
+      device_param->kernel_params[16] = device_param->metal_d_digests_shown;
+      device_param->kernel_params[17] = device_param->metal_d_salt_bufs;
+      device_param->kernel_params[18] = device_param->metal_d_esalt_bufs;
+      device_param->kernel_params[19] = device_param->metal_d_result;
+      device_param->kernel_params[20] = device_param->metal_d_extra0_buf;
+      device_param->kernel_params[21] = device_param->metal_d_extra1_buf;
+      device_param->kernel_params[22] = device_param->metal_d_extra2_buf;
+      device_param->kernel_params[23] = device_param->metal_d_extra3_buf;
+      device_param->kernel_params[24] = device_param->metal_d_kernel_param;
+    }
+    #endif // __APPLE__
+
+    if (device_param->is_opencl == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf;
+      device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c;
+      device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c;
+      device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks;
+      device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d;
+      device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a;
+      device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b;
+      device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c;
+      device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d;
+      device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs;
+      device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
+      device_param->kernel_params[16] = &device_param->opencl_d_digests_shown;
+      device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
+      device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
+      device_param->kernel_params[19] = &device_param->opencl_d_result;
+      device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf;
+      device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf;
+      device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf;
+      device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf;
+      device_param->kernel_params[24] = &device_param->opencl_d_kernel_param;
+    }
+
+    if (user_options->slow_candidates == true)
+    {
+    }
+    else
+    {
+      device_param->kernel_params_mp_buf64[3] = 0;
+      device_param->kernel_params_mp_buf32[4] = 0;
+      device_param->kernel_params_mp_buf32[5] = 0;
+      device_param->kernel_params_mp_buf32[6] = 0;
+      device_param->kernel_params_mp_buf32[7] = 0;
+      device_param->kernel_params_mp_buf64[8] = 0;
+
+      if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+      {
+        if (device_param->is_cuda == true)
+        {
+          device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
+        }
+
+        if (device_param->is_hip == true)
+        {
+          device_param->kernel_params_mp[0] = &device_param->hip_d_combs;
+        }
+
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          device_param->kernel_params_mp[0] = device_param->metal_d_combs;
+        }
+        #endif
+
+        if (device_param->is_opencl == true)
+        {
+          device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+        }
+      }
+      else
+      {
+        if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+        {
+          if (device_param->is_cuda == true)
+          {
+            device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
+          }
+
+          if (device_param->is_hip == true)
+          {
+            device_param->kernel_params_mp[0] = &device_param->hip_d_combs;
+          }
+
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            device_param->kernel_params_mp[0] = device_param->metal_d_combs;
+          }
+          #endif
+
+          if (device_param->is_opencl == true)
+          {
+            device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+          }
+        }
+        else
+        {
+          device_param->kernel_params_mp[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                    // ? &device_param->opencl_d_pws_buf
+                                                    // : &device_param->opencl_d_pws_amp_buf;
+        }
+      }
+
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_mp[1] = &device_param->cuda_d_root_css_buf;
+        device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf;
+      }
+
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf;
+        device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf;
+      }
+
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        device_param->kernel_params_mp[1] = device_param->metal_d_root_css_buf;
+        device_param->kernel_params_mp[2] = device_param->metal_d_markov_css_buf;
+      }
+      #endif
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf;
+        device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf;
+      }
+
+      device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3];
+      device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4];
+      device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5];
+      device_param->kernel_params_mp[6] = &device_param->kernel_params_mp_buf32[6];
+      device_param->kernel_params_mp[7] = &device_param->kernel_params_mp_buf32[7];
+      device_param->kernel_params_mp[8] = &device_param->kernel_params_mp_buf64[8];
+
+      device_param->kernel_params_mp_l_buf64[3] = 0;
+      device_param->kernel_params_mp_l_buf32[4] = 0;
+      device_param->kernel_params_mp_l_buf32[5] = 0;
+      device_param->kernel_params_mp_l_buf32[6] = 0;
+      device_param->kernel_params_mp_l_buf32[7] = 0;
+      device_param->kernel_params_mp_l_buf32[8] = 0;
+      device_param->kernel_params_mp_l_buf64[9] = 0;
+
+      device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                  // ? &device_param->opencl_d_pws_buf
+                                                  // : &device_param->opencl_d_pws_amp_buf;
+
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf;
+      }
+
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf;
+      }
+
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        device_param->kernel_params_mp_l[1] = device_param->metal_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = device_param->metal_d_markov_css_buf;
+      }
+      #endif
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf;
+      }
+
+      device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3];
+      device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4];
+      device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5];
+      device_param->kernel_params_mp_l[6] = &device_param->kernel_params_mp_l_buf32[6];
+      device_param->kernel_params_mp_l[7] = &device_param->kernel_params_mp_l_buf32[7];
+      device_param->kernel_params_mp_l[8] = &device_param->kernel_params_mp_l_buf32[8];
+      device_param->kernel_params_mp_l[9] = &device_param->kernel_params_mp_l_buf64[9];
+
+      device_param->kernel_params_mp_r_buf64[3] = 0;
+      device_param->kernel_params_mp_r_buf32[4] = 0;
+      device_param->kernel_params_mp_r_buf32[5] = 0;
+      device_param->kernel_params_mp_r_buf32[6] = 0;
+      device_param->kernel_params_mp_r_buf32[7] = 0;
+      device_param->kernel_params_mp_r_buf64[8] = 0;
+
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_mp_r[0] = &device_param->cuda_d_bfs;
+        device_param->kernel_params_mp_r[1] = &device_param->cuda_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf;
+      }
+
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs;
+        device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf;
+      }
+
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        device_param->kernel_params_mp_r[0] = device_param->metal_d_bfs;
+        device_param->kernel_params_mp_r[1] = device_param->metal_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = device_param->metal_d_markov_css_buf;
+      }
+      #endif
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs;
+        device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf;
+      }
+
+      device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3];
+      device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4];
+      device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5];
+      device_param->kernel_params_mp_r[6] = &device_param->kernel_params_mp_r_buf32[6];
+      device_param->kernel_params_mp_r[7] = &device_param->kernel_params_mp_r_buf32[7];
+      device_param->kernel_params_mp_r[8] = &device_param->kernel_params_mp_r_buf64[8];
+
+      device_param->kernel_params_amp_buf32[5] = 0; // combs_mode
+      device_param->kernel_params_amp_buf64[6] = 0; // gid_max
+
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // &device_param->cuda_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // &device_param->cuda_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = &device_param->cuda_d_rules_c;
+        device_param->kernel_params_amp[3] = &device_param->cuda_d_combs_c;
+        device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c;
+      }
+
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c;
+        device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c;
+        device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c;
+      }
+
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // device_param->metal_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // device_param->metal_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = device_param->metal_d_rules_c;
+        device_param->kernel_params_amp[3] = device_param->metal_d_combs_c;
+        device_param->kernel_params_amp[4] = device_param->metal_d_bfs_c;
+      }
+      #endif
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c;
+        device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c;
+        device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c;
+      }
+
+      device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
+      device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6];
+
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->cuda_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c;
+      }
+
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c;
+      }
+
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        device_param->kernel_params_tm[0] = device_param->metal_d_bfs_c;
+        device_param->kernel_params_tm[1] = device_param->metal_d_tm_c;
+      }
+      #endif
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
+      }
+    }
+
+    device_param->kernel_params_memset_buf32[1] = 0; // value
+    device_param->kernel_params_memset_buf64[2] = 0; // gid_max
+
+    device_param->kernel_params_memset[0] = NULL;
+    device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
+    device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf64[2];
+
+    device_param->kernel_params_bzero_buf64[1] = 0; // gid_max
+
+    device_param->kernel_params_bzero[0] = NULL;
+    device_param->kernel_params_bzero[1] = &device_param->kernel_params_bzero_buf64[1];
+
+    device_param->kernel_params_atinit_buf64[1] = 0; // gid_max
+
+    device_param->kernel_params_atinit[0] = NULL;
+    device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1];
+
+    device_param->kernel_params_utf8toutf16le_buf64[1] = 0; // gid_max
+
+    device_param->kernel_params_utf8toutf16le[0] = NULL;
+    device_param->kernel_params_utf8toutf16le[1] = &device_param->kernel_params_utf8toutf16le_buf64[1];
+
+    device_param->kernel_params_decompress_buf64[3] = 0; // gid_max
+
+    if (device_param->is_cuda == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // &device_param->cuda_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // &device_param->cuda_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? &device_param->cuda_d_pws_buf
+                                                        // : &device_param->cuda_d_pws_amp_buf;
+    }
+
+    if (device_param->is_hip == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? &device_param->hip_d_pws_buf
+                                                        // : &device_param->hip_d_pws_amp_buf;
+    }
+
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // device_param->metal_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // device_param->metal_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? device_param->metal_d_pws_buf
+                                                        // : device_param->metal_d_pws_amp_buf;
+    }
+    #endif
+
+    if (device_param->is_opencl == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? &device_param->opencl_d_pws_buf
+                                                        // : &device_param->opencl_d_pws_amp_buf;
+    }
+
+    device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3];
+
+    /**
+     * kernel name
+     */
+
+    if (device_param->is_cuda == true)
+    {
+      char kernel_name[64] = { 0 };
+
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
+        {
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
+
+            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
+
+            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
+
+            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4;
+
+            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+          }
+        }
+        else
+        {
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
+
+            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
+
+            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
+
+            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
+
+            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+              backend_kernel_create_warnings++;
+
+              device_param->skipped_warning = true;
+              continue;
+            }
+
+            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1;
+
+            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+
+            device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4;
+
+            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+          }
+        }
+
+        if (user_options->slow_candidates == true)
+        {
+        }
+        else
+        {
+          if (user_options->attack_mode == ATTACK_MODE_BF)
+          {
+            if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
+            {
+              snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
+
+              if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_tm, device_param->cuda_module, kernel_name) == -1)
+              {
+                event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+                backend_kernel_create_warnings++;
+
+                device_param->skipped_warning = true;
+                continue;
+              }
+
+              if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_wgs_tm) == -1) return -1;
+
+              if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1;
+
+              device_param->kernel_dynamic_local_mem_size_tm = device_param->device_local_mem_size - device_param->kernel_local_mem_size_tm;
+
+              device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size;
+            }
+          }
+        }
+      }
+      else
+      {
+        // kernel1
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
+
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1;
+
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+
+        device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
+
+        device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+
+        // kernel2
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
+
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1;
+
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+
+        device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
+
+        device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+
+        // kernel3
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
+
+        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1)
+        {
+          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+          backend_kernel_create_warnings++;
+
+          device_param->skipped_warning = true;
+          continue;
+        }
+
+        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1;
+
+        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+
+        device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
+
+        device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
+        {
+          // kernel2p
+
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2p, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_wgs2p) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size2p;
+
+          device_param->kernel_preferred_wgs_multiple2p = device_param->cuda_warp_size;
+        }
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
+        {
+          // kernel2e
 
-    if (device_param->is_hip == true)
-    {
-      device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf;
-      device_param->kernel_params[ 1] = &device_param->hip_d_rules_c;
-      device_param->kernel_params[ 2] = &device_param->hip_d_combs_c;
-      device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c;
-      device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps;
-      device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks;
-      device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a;
-      device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b;
-      device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c;
-      device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d;
-      device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a;
-      device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b;
-      device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c;
-      device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d;
-      device_param->kernel_params[14] = &device_param->hip_d_plain_bufs;
-      device_param->kernel_params[15] = &device_param->hip_d_digests_buf;
-      device_param->kernel_params[16] = &device_param->hip_d_digests_shown;
-      device_param->kernel_params[17] = &device_param->hip_d_salt_bufs;
-      device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs;
-      device_param->kernel_params[19] = &device_param->hip_d_result;
-      device_param->kernel_params[20] = &device_param->hip_d_extra0_buf;
-      device_param->kernel_params[21] = &device_param->hip_d_extra1_buf;
-      device_param->kernel_params[22] = &device_param->hip_d_extra2_buf;
-      device_param->kernel_params[23] = &device_param->hip_d_extra3_buf;
-      device_param->kernel_params[24] = &device_param->hip_d_kernel_param;
-    }
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type);
 
-    if (device_param->is_opencl == true)
-    {
-      device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf;
-      device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c;
-      device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c;
-      device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c;
-      device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps;
-      device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks;
-      device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a;
-      device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b;
-      device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c;
-      device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d;
-      device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a;
-      device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b;
-      device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c;
-      device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d;
-      device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs;
-      device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
-      device_param->kernel_params[16] = &device_param->opencl_d_digests_shown;
-      device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
-      device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
-      device_param->kernel_params[19] = &device_param->opencl_d_result;
-      device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf;
-      device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf;
-      device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf;
-      device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf;
-      device_param->kernel_params[24] = &device_param->opencl_d_kernel_param;
-    }
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2e, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_wgs2e) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size2e = device_param->device_local_mem_size - device_param->kernel_local_mem_size2e;
+
+          device_param->kernel_preferred_wgs_multiple2e = device_param->cuda_warp_size;
+        }
+
+        // kernel12
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function12, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_wgs12) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_local_mem_size12) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size12 = device_param->device_local_mem_size - device_param->kernel_local_mem_size12;
+
+          device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size;
+        }
+
+        // kernel23
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function23, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_wgs23) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_local_mem_size23) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size23 = device_param->device_local_mem_size - device_param->kernel_local_mem_size23;
+
+          device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size;
+        }
+
+        // init2
+
+        if (hashconfig->opts_type & OPTS_TYPE_INIT2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_init2, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_wgs_init2) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_init2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_init2;
+
+          device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
+        }
+
+        // loop2 prepare
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2p, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_loop2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2p;
+
+          device_param->kernel_preferred_wgs_multiple_loop2p = device_param->cuda_warp_size;
+        }
+
+        // loop2
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_loop2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2;
+
+          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size;
+        }
+
+        // aux1
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX1)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux1, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_aux1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux1;
+
+          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size;
+        }
 
-    if (user_options->slow_candidates == true)
-    {
-    }
-    else
-    {
-      device_param->kernel_params_mp_buf64[3] = 0;
-      device_param->kernel_params_mp_buf32[4] = 0;
-      device_param->kernel_params_mp_buf32[5] = 0;
-      device_param->kernel_params_mp_buf32[6] = 0;
-      device_param->kernel_params_mp_buf32[7] = 0;
-      device_param->kernel_params_mp_buf64[8] = 0;
+        // aux2
 
-      if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
-      {
-        if (device_param->is_cuda == true)
+        if (hashconfig->opts_type & OPTS_TYPE_AUX2)
         {
-          device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux2, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_aux2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux2;
+
+          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size;
         }
 
-        if (device_param->is_hip == true)
+        // aux3
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX3)
         {
-          device_param->kernel_params_mp[0] = &device_param->hip_d_combs;
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux3, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_aux3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux3;
+
+          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size;
         }
 
-        if (device_param->is_opencl == true)
+        // aux4
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX4)
         {
-          device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux4, device_param->cuda_module, kernel_name) == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_aux4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux4;
+
+          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size;
         }
       }
+
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
+
+      // MP start
+
+      if (user_options->slow_candidates == true)
+      {
+      }
       else
       {
-        if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+        if (user_options->attack_mode == ATTACK_MODE_BF)
         {
-          if (device_param->is_cuda == true)
+          // mp_l
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_l, device_param->cuda_module_mp, "l_markov") == -1)
           {
-            device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "l_markov");
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
           }
 
-          if (device_param->is_hip == true)
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_mp_l = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_l;
+
+          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size;
+
+          // mp_r
+
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_r, device_param->cuda_module_mp, "r_markov") == -1)
           {
-            device_param->kernel_params_mp[0] = &device_param->hip_d_combs;
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "r_markov");
+
+            backend_kernel_create_warnings++;
+
+            device_param->skipped_warning = true;
+            continue;
           }
 
-          if (device_param->is_opencl == true)
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_mp_r = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_r;
+
+          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
+
+          if (user_options->attack_mode == ATTACK_MODE_BF)
           {
-            device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+            if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
+            {
+              //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
+              //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
+            }
           }
         }
-        else
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
         {
-          device_param->kernel_params_mp[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                    // ? &device_param->opencl_d_pws_buf
-                                                    // : &device_param->opencl_d_pws_amp_buf;
-        }
-      }
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov");
 
-      if (device_param->is_cuda == true)
-      {
-        device_param->kernel_params_mp[1] = &device_param->cuda_d_root_css_buf;
-        device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf;
-      }
+            backend_kernel_create_warnings++;
 
-      if (device_param->is_hip == true)
-      {
-        device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf;
-        device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf;
-      }
+            device_param->skipped_warning = true;
+            continue;
+          }
 
-      if (device_param->is_opencl == true)
-      {
-        device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf;
-        device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf;
-      }
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
 
-      device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3];
-      device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4];
-      device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5];
-      device_param->kernel_params_mp[6] = &device_param->kernel_params_mp_buf32[6];
-      device_param->kernel_params_mp[7] = &device_param->kernel_params_mp_buf32[7];
-      device_param->kernel_params_mp[8] = &device_param->kernel_params_mp_buf64[8];
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
 
-      device_param->kernel_params_mp_l_buf64[3] = 0;
-      device_param->kernel_params_mp_l_buf32[4] = 0;
-      device_param->kernel_params_mp_l_buf32[5] = 0;
-      device_param->kernel_params_mp_l_buf32[6] = 0;
-      device_param->kernel_params_mp_l_buf32[7] = 0;
-      device_param->kernel_params_mp_l_buf32[8] = 0;
-      device_param->kernel_params_mp_l_buf64[9] = 0;
+          device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp;
 
-      device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                  // ? &device_param->opencl_d_pws_buf
-                                                  // : &device_param->opencl_d_pws_amp_buf;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+        {
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov");
 
-      if (device_param->is_cuda == true)
-      {
-        device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf;
-        device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf;
-      }
+            backend_kernel_create_warnings++;
 
-      if (device_param->is_hip == true)
-      {
-        device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf;
-        device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf;
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+
+          device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp;
+
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+        }
       }
 
-      if (device_param->is_opencl == true)
+      if (user_options->slow_candidates == true)
       {
-        device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf;
-        device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf;
       }
+      else
+      {
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_amp, device_param->cuda_module_amp, "amp") == -1)
+          {
+            event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "amp");
 
-      device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3];
-      device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4];
-      device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5];
-      device_param->kernel_params_mp_l[6] = &device_param->kernel_params_mp_l_buf32[6];
-      device_param->kernel_params_mp_l[7] = &device_param->kernel_params_mp_l_buf32[7];
-      device_param->kernel_params_mp_l[8] = &device_param->kernel_params_mp_l_buf32[8];
-      device_param->kernel_params_mp_l[9] = &device_param->kernel_params_mp_l_buf64[9];
+            backend_kernel_create_warnings++;
 
-      device_param->kernel_params_mp_r_buf64[3] = 0;
-      device_param->kernel_params_mp_r_buf32[4] = 0;
-      device_param->kernel_params_mp_r_buf32[5] = 0;
-      device_param->kernel_params_mp_r_buf32[6] = 0;
-      device_param->kernel_params_mp_r_buf32[7] = 0;
-      device_param->kernel_params_mp_r_buf64[8] = 0;
+            device_param->skipped_warning = true;
+            continue;
+          }
+
+          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_wgs_amp) == -1) return -1;
 
-      if (device_param->is_cuda == true)
-      {
-        device_param->kernel_params_mp_r[0] = &device_param->cuda_d_bfs;
-        device_param->kernel_params_mp_r[1] = &device_param->cuda_d_root_css_buf;
-        device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf;
-      }
+          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1;
 
-      if (device_param->is_hip == true)
-      {
-        device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs;
-        device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf;
-        device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf;
-      }
+          device_param->kernel_dynamic_local_mem_size_amp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_amp;
 
-      if (device_param->is_opencl == true)
-      {
-        device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs;
-        device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf;
-        device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf;
-      }
+          device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size;
+        }
 
-      device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3];
-      device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4];
-      device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5];
-      device_param->kernel_params_mp_r[6] = &device_param->kernel_params_mp_r_buf32[6];
-      device_param->kernel_params_mp_r[7] = &device_param->kernel_params_mp_r_buf32[7];
-      device_param->kernel_params_mp_r[8] = &device_param->kernel_params_mp_r_buf64[8];
+        /*
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          for (u32 i = 0; i < 5; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
 
-      device_param->kernel_params_amp_buf32[5] = 0; // combs_mode
-      device_param->kernel_params_amp_buf64[6] = 0; // gid_max
+            //if (CL_rc == -1) return -1;
+          }
 
-      if (device_param->is_cuda == true)
-      {
-        device_param->kernel_params_amp[0] = NULL; // &device_param->cuda_d_pws_buf;
-        device_param->kernel_params_amp[1] = NULL; // &device_param->cuda_d_pws_amp_buf;
-        device_param->kernel_params_amp[2] = &device_param->cuda_d_rules_c;
-        device_param->kernel_params_amp[3] = &device_param->cuda_d_combs_c;
-        device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c;
-      }
+          for (u32 i = 5; i < 6; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
 
-      if (device_param->is_hip == true)
-      {
-        device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf;
-        device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf;
-        device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c;
-        device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c;
-        device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c;
-      }
+            //if (CL_rc == -1) return -1;
+          }
 
-      if (device_param->is_opencl == true)
-      {
-        device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf;
-        device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf;
-        device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c;
-        device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c;
-        device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c;
+          for (u32 i = 6; i < 7; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+        }
+        */
       }
 
-      device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
-      device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6];
+      // zero some data buffers
 
-      if (device_param->is_cuda == true)
+      if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
+      if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown)   == -1) return -1;
+      if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result,        device_param->size_results) == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
       {
-        device_param->kernel_params_tm[0] = &device_param->cuda_d_bfs_c;
-        device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c;
+        if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
       }
-
-      if (device_param->is_hip == true)
+      else
       {
-        device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c;
-        device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c;
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs,          size_combs)       == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c,        size_combs)       == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css)  == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs,            size_bfs)         == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c,          size_bfs)         == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c,           size_tm)          == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css)  == -1) return -1;
+        }
       }
 
-      if (device_param->is_opencl == true)
+      if (user_options->slow_candidates == true)
       {
-        device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
-        device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
       }
-    }
-
-    device_param->kernel_params_memset_buf32[1] = 0; // value
-    device_param->kernel_params_memset_buf64[2] = 0; // gid_max
-
-    device_param->kernel_params_memset[0] = NULL;
-    device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
-    device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf64[2];
-
-    device_param->kernel_params_bzero_buf64[1] = 0; // gid_max
-
-    device_param->kernel_params_bzero[0] = NULL;
-    device_param->kernel_params_bzero[1] = &device_param->kernel_params_bzero_buf64[1];
+      else
+      {
+        if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2))
+        {
+          /**
+           * prepare mp
+           */
 
-    device_param->kernel_params_atinit_buf64[1] = 0; // gid_max
+          if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
 
-    device_param->kernel_params_atinit[0] = NULL;
-    device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1];
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_buf32[5] = full01;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_buf32[5] = full06;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_buf32[5] = full80;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
+          }
+          else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
+          }
 
-    device_param->kernel_params_utf8toutf16le_buf64[1] = 0; // gid_max
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_BF)
+        {
+          /**
+           * prepare mp_r and mp_l
+           */
 
-    device_param->kernel_params_utf8toutf16le[0] = NULL;
-    device_param->kernel_params_utf8toutf16le[1] = &device_param->kernel_params_utf8toutf16le_buf64[1];
+          device_param->kernel_params_mp_l_buf32[6] = 0;
+          device_param->kernel_params_mp_l_buf32[7] = 0;
+          device_param->kernel_params_mp_l_buf32[8] = 0;
 
-    device_param->kernel_params_decompress_buf64[3] = 0; // gid_max
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_l_buf32[6] = full01;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_l_buf32[6] = full06;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_l_buf32[6] = full80;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
 
-    if (device_param->is_cuda == true)
-    {
-      device_param->kernel_params_decompress[0] = NULL; // &device_param->cuda_d_pws_idx;
-      device_param->kernel_params_decompress[1] = NULL; // &device_param->cuda_d_pws_comp_buf;
-      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                        // ? &device_param->cuda_d_pws_buf
-                                                        // : &device_param->cuda_d_pws_amp_buf;
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+        }
+      }
     }
 
     if (device_param->is_hip == true)
-    {
-      device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx;
-      device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf;
-      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                        // ? &device_param->hip_d_pws_buf
-                                                        // : &device_param->hip_d_pws_amp_buf;
-    }
-
-    if (device_param->is_opencl == true)
-    {
-      device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx;
-      device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf;
-      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                        // ? &device_param->opencl_d_pws_buf
-                                                        // : &device_param->opencl_d_pws_amp_buf;
-    }
-
-    device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3];
-
-    /**
-     * kernel name
-     */
-
-    if (device_param->is_cuda == true)
     {
       char kernel_name[64] = { 0 };
 
@@ -9511,7 +11966,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9521,19 +11976,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
 
             // kernel2
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9543,19 +11998,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
 
             // kernel3
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9565,19 +12020,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
           }
           else
           {
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9587,13 +12042,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size;
           }
         }
         else
@@ -9604,7 +12059,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9614,19 +12069,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
 
             // kernel2
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9636,19 +12091,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
 
             // kernel3
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9658,19 +12113,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
           }
           else
           {
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
 
-            if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name) == -1)
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9680,13 +12135,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4) == -1) return -1;
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1;
 
-            if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
 
             device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size;
           }
         }
 
@@ -9701,7 +12156,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             {
               snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
 
-              if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_tm, device_param->cuda_module, kernel_name) == -1)
+              if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1)
               {
                 event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9711,13 +12166,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
                 continue;
               }
 
-              if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_wgs_tm) == -1) return -1;
+              if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1;
 
-              if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1;
+              if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1;
 
               device_param->kernel_dynamic_local_mem_size_tm = device_param->device_local_mem_size - device_param->kernel_local_mem_size_tm;
 
-              device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size;
+              device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size;
             }
           }
         }
@@ -9728,7 +12183,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name) == -1)
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1)
         {
           event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9738,19 +12193,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           continue;
         }
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1) == -1) return -1;
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
 
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
 
         device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
 
-        device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+        device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
 
         // kernel2
 
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name) == -1)
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1)
         {
           event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9760,19 +12215,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           continue;
         }
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2) == -1) return -1;
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
 
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
 
         device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
 
-        device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+        device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
 
         // kernel3
 
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
 
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name) == -1)
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1)
         {
           event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9782,13 +12237,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           continue;
         }
 
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3) == -1) return -1;
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
 
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
 
         device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
 
-        device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+        device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
 
         if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
         {
@@ -9796,7 +12251,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2p, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2p, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9806,13 +12261,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_wgs2p) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_wgs2p) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size2p;
 
-          device_param->kernel_preferred_wgs_multiple2p = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size;
         }
 
         if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
@@ -9821,7 +12276,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2e, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9831,13 +12286,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_wgs2e) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size2e = device_param->device_local_mem_size - device_param->kernel_local_mem_size2e;
 
-          device_param->kernel_preferred_wgs_multiple2e = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size;
         }
 
         // kernel12
@@ -9846,7 +12301,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function12, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9856,13 +12311,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_wgs12) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_local_mem_size12) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size12 = device_param->device_local_mem_size - device_param->kernel_local_mem_size12;
 
-          device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size;
         }
 
         // kernel23
@@ -9871,7 +12326,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function23, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9881,13 +12336,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_wgs23) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_local_mem_size23) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size23 = device_param->device_local_mem_size - device_param->kernel_local_mem_size23;
 
-          device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size;
         }
 
         // init2
@@ -9896,7 +12351,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_init2, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9906,13 +12361,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_wgs_init2) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_init2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_init2;
 
-          device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size;
         }
 
         // loop2 prepare
@@ -9921,7 +12376,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2p, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2p, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9931,13 +12386,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_loop2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2p;
 
-          device_param->kernel_preferred_wgs_multiple_loop2p = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size;
         }
 
         // loop2
@@ -9946,7 +12401,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9956,13 +12411,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_loop2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2;
 
-          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size;
         }
 
         // aux1
@@ -9971,7 +12426,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux1, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -9981,13 +12436,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_aux1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux1;
 
-          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size;
         }
 
         // aux2
@@ -9996,7 +12451,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux2, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10006,13 +12461,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_aux2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux2;
 
-          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size;
         }
 
         // aux3
@@ -10021,7 +12476,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux3, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10031,13 +12486,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_aux3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux3;
 
-          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size;
         }
 
         // aux4
@@ -10046,7 +12501,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux4, device_param->cuda_module, kernel_name) == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10056,13 +12511,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_aux4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux4;
 
-          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size;
         }
       }
 
@@ -10082,7 +12537,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           // mp_l
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_l, device_param->cuda_module_mp, "l_markov") == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "l_markov");
 
@@ -10092,17 +12547,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_mp_l = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_l;
 
-          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size;
 
           // mp_r
 
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_r, device_param->cuda_module_mp, "r_markov") == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "r_markov");
 
@@ -10112,13 +12567,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_mp_r = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_r;
 
-          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size;
 
           if (user_options->attack_mode == ATTACK_MODE_BF)
           {
@@ -10131,7 +12586,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         }
         else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
         {
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov");
 
@@ -10141,17 +12596,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size;
         }
         else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
         {
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov");
 
@@ -10161,13 +12616,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size;
         }
       }
 
@@ -10182,7 +12637,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         }
         else
         {
-          if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_amp, device_param->cuda_module_amp, "amp") == -1)
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "amp");
 
@@ -10192,13 +12647,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_wgs_amp) == -1) return -1;
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1;
 
-          if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1;
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1;
 
           device_param->kernel_dynamic_local_mem_size_amp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_amp;
 
-          device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size;
         }
 
         /*
@@ -10234,9 +12689,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       // zero some data buffers
 
-      if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
-      if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown)   == -1) return -1;
-      if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result,        device_param->size_results) == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown)   == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result,        device_param->size_results) == -1) return -1;
 
       /**
        * special buffers
@@ -10244,28 +12699,28 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (user_options->slow_candidates == true)
       {
-        if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
+        if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
       }
       else
       {
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs,          size_combs)       == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c,        size_combs)       == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf,   size_root_css)    == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css)  == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs,          size_combs)       == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c,        size_combs)       == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css)  == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
         {
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs,            size_bfs)         == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c,          size_bfs)         == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c,           size_tm)          == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf,   size_root_css)    == -1) return -1;
-          if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css)  == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs,            size_bfs)         == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c,          size_bfs)         == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c,           size_tm)          == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css)  == -1) return -1;
         }
       }
 
@@ -10323,7 +12778,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    if (device_param->is_hip == true)
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
     {
       char kernel_name[64] = { 0 };
 
@@ -10333,11 +12789,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
           {
-            // kernel1
+            // kernel1: m%05u_s%02d
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function1, &device_param->metal_pipeline1) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10347,19 +12803,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_wgs1) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_preferred_wgs_multiple1) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
+            device_param->kernel_local_mem_size1 = 0;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size1 = 0;
 
-            // kernel2
+            // kernel2: m%05u_s%02d
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2, &device_param->metal_pipeline2) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10369,19 +12825,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_wgs2) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_preferred_wgs_multiple2) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
+            device_param->kernel_local_mem_size2 = 0;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size2 = 0;
 
-            // kernel3
+            // kernel3: m%05u_s%02d
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function3, &device_param->metal_pipeline3) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10391,19 +12847,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_wgs3) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_preferred_wgs_multiple3) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
+            device_param->kernel_local_mem_size3 = 0;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size3 = 0;
           }
           else
           {
+            // kernel4: m%05u_sxx
+
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function4, &device_param->metal_pipeline4) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10413,16 +12871,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_wgs4) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_preferred_wgs_multiple4) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4;
+            device_param->kernel_local_mem_size4 = 0;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size4 = 0;
           }
         }
-        else
+        else // multi
         {
           if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
           {
@@ -10430,7 +12888,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function1, &device_param->metal_pipeline1) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10440,19 +12898,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_wgs1) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_preferred_wgs_multiple1) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
+            device_param->kernel_local_mem_size1 = 0;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size1 = 0;
 
             // kernel2
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2, &device_param->metal_pipeline2) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10462,19 +12920,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_wgs2) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_preferred_wgs_multiple2) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
+            device_param->kernel_local_mem_size2 = 0;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size2 = 0;
 
             // kernel3
 
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function3, &device_param->metal_pipeline3) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10484,19 +12942,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_wgs3) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_preferred_wgs_multiple3) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
+            device_param->kernel_local_mem_size3 = 0;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size3 = 0;
           }
           else
           {
+            // kernel4
+
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
 
-            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1)
+            if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function4, &device_param->metal_pipeline4) == -1)
             {
               event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10506,13 +12966,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
               continue;
             }
 
-            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1;
+            if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_wgs4) == -1) return -1;
 
-            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+            if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline4, &device_param->kernel_preferred_wgs_multiple4) == -1) return -1;
 
-            device_param->kernel_dynamic_local_mem_size4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size4;
+            device_param->kernel_local_mem_size4 = 0;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size;
+            device_param->kernel_dynamic_local_mem_size4 = 0;
           }
         }
 
@@ -10527,7 +12987,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             {
               snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
 
-              if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1)
+              if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_tm, &device_param->metal_pipeline_tm) == -1)
               {
                 event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10537,24 +12997,24 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
                 continue;
               }
 
-              if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1;
+              if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_tm, &device_param->kernel_wgs_tm) == -1) return -1;
 
-              if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1;
+              if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_tm, &device_param->kernel_preferred_wgs_multiple_tm) == -1) return -1;
 
-              device_param->kernel_dynamic_local_mem_size_tm = device_param->device_local_mem_size - device_param->kernel_local_mem_size_tm;
+              device_param->kernel_local_mem_size_tm = 0;
 
-              device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size;
+              device_param->kernel_dynamic_local_mem_size_tm = 0;
             }
           }
         }
       }
       else
       {
-        // kernel1
+        // kernel1: m%05u_init
 
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
 
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1)
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function1, &device_param->metal_pipeline1) == -1)
         {
           event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10564,19 +13024,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           continue;
         }
 
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
+        if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_wgs1) == -1) return -1;
 
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline1, &device_param->kernel_preferred_wgs_multiple1) == -1) return -1;
 
-        device_param->kernel_dynamic_local_mem_size1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size1;
+        device_param->kernel_local_mem_size1 = 0;
 
-        device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
+        device_param->kernel_dynamic_local_mem_size1 = 0;
 
-        // kernel2
+        // kernel2: m%05u_loop
 
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
 
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1)
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2, &device_param->metal_pipeline2) == -1)
         {
           event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10586,19 +13046,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           continue;
         }
 
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
+        if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_wgs2) == -1) return -1;
 
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2, &device_param->kernel_preferred_wgs_multiple2) == -1) return -1;
 
-        device_param->kernel_dynamic_local_mem_size2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size2;
+        device_param->kernel_local_mem_size2 = 0;
 
-        device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
+        device_param->kernel_dynamic_local_mem_size2 = 0;
 
-        // kernel3
+        // kernel3: m%05u_comp
 
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
 
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1)
+        if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function3, &device_param->metal_pipeline3) == -1)
         {
           event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10608,21 +13068,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           continue;
         }
 
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
+        if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_wgs3) == -1) return -1;
 
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+        if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline3, &device_param->kernel_preferred_wgs_multiple3) == -1) return -1;
 
-        device_param->kernel_dynamic_local_mem_size3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size3;
+        device_param->kernel_local_mem_size3 = 0;
 
-        device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
+        device_param->kernel_dynamic_local_mem_size3 = 0;
 
         if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
         {
-          // kernel2p
+          // kernel2p: m%05u_loop_prepare
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2p, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2p, &device_param->metal_pipeline2p) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10632,22 +13092,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_wgs2p) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2p, &device_param->kernel_wgs2p) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2p, &device_param->kernel_preferred_wgs_multiple2p) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size2p;
+          device_param->kernel_local_mem_size2p = 0;
 
-          device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size2p = 0;
         }
 
         if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
         {
-          // kernel2e
+          // kernel2e: m%05u_loop_extended
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function2e, &device_param->metal_pipeline2e) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10657,22 +13117,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline2e, &device_param->kernel_wgs2e) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline2e, &device_param->kernel_preferred_wgs_multiple2e) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size2e = device_param->device_local_mem_size - device_param->kernel_local_mem_size2e;
+          device_param->kernel_local_mem_size2e = 0;
 
-          device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size2e = 0;
         }
 
-        // kernel12
-
         if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
         {
+          // kernel12: m%05u_hook12
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function12, &device_param->metal_pipeline12) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10682,22 +13142,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline12, &device_param->kernel_wgs12) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline12, &device_param->kernel_preferred_wgs_multiple12) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size12 = device_param->device_local_mem_size - device_param->kernel_local_mem_size12;
+          device_param->kernel_local_mem_size12 = 0;
 
-          device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size12 = 0;
         }
 
-        // kernel23
-
         if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
         {
+          // kernel23: m%05u_hook23
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function23, &device_param->metal_pipeline23) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10707,22 +13167,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline23, &device_param->kernel_wgs23) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline23, &device_param->kernel_preferred_wgs_multiple23) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size23 = device_param->device_local_mem_size - device_param->kernel_local_mem_size23;
+          device_param->kernel_local_mem_size23 = 0;
 
-          device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size23 = 0;
         }
 
-        // init2
-
         if (hashconfig->opts_type & OPTS_TYPE_INIT2)
         {
+          // init2: m%05u_init2
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_init2, &device_param->metal_pipeline_init2) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10732,22 +13192,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_init2, &device_param->kernel_wgs_init2) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_init2, &device_param->kernel_preferred_wgs_multiple_init2) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_init2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_init2;
+          device_param->kernel_local_mem_size_init2 = 0;
 
-          device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_init2 = 0;
         }
 
-        // loop2 prepare
-
         if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
         {
+          // loop2 prepare: m%05u_loop2_prepare
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2p, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_loop2p, &device_param->metal_pipeline_loop2p) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10757,22 +13217,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_loop2p, &device_param->kernel_preferred_wgs_multiple_loop2p) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_loop2p = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2p;
+          device_param->kernel_local_mem_size_loop2p = 0;
 
-          device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_loop2p = 0;
         }
 
-        // loop2
-
         if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
         {
+          // loop2: m%05u_loop2
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_loop2, &device_param->metal_pipeline_loop2) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10782,22 +13242,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_loop2, &device_param->kernel_wgs_loop2) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_loop2, &device_param->kernel_preferred_wgs_multiple_loop2) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_loop2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_loop2;
+          device_param->kernel_local_mem_size_loop2 = 0;
 
-          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_loop2 = 0;
         }
 
-        // aux1
-
         if (hashconfig->opts_type & OPTS_TYPE_AUX1)
         {
+          // aux1: m%05u_aux1
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux1, &device_param->metal_pipeline_aux1) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10807,22 +13267,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux1, &device_param->kernel_wgs_aux1) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux1, &device_param->kernel_preferred_wgs_multiple_aux1) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_aux1 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux1;
+          device_param->kernel_local_mem_size_aux1 = 0;
 
-          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_aux1 = 0;
         }
 
-        // aux2
-
         if (hashconfig->opts_type & OPTS_TYPE_AUX2)
         {
+          // aux2: m%05u_aux2
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux2, &device_param->metal_pipeline_aux2) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10832,22 +13292,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux2, &device_param->kernel_wgs_aux2) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux2, &device_param->kernel_preferred_wgs_multiple_aux2) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_aux2 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux2;
+          device_param->kernel_local_mem_size_aux2 = 0;
 
-          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_aux2 = 0;
         }
 
-        // aux3
-
         if (hashconfig->opts_type & OPTS_TYPE_AUX3)
         {
+          // aux3: m%05u_aux3
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux3, &device_param->metal_pipeline_aux3) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10857,22 +13317,22 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux3, &device_param->kernel_wgs_aux3) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux3, &device_param->kernel_preferred_wgs_multiple_aux3) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_aux3 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux3;
+          device_param->kernel_local_mem_size_aux3 = 0;
 
-          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_aux3 = 0;
         }
 
-        // aux4
-
         if (hashconfig->opts_type & OPTS_TYPE_AUX4)
         {
+          // aux4: m%05u_aux4
+
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library, kernel_name, &device_param->metal_function_aux4, &device_param->metal_pipeline_aux4) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, kernel_name);
 
@@ -10882,21 +13342,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_aux4, &device_param->kernel_wgs_aux4) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_aux4, &device_param->kernel_preferred_wgs_multiple_aux4) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_aux4 = device_param->device_local_mem_size - device_param->kernel_local_mem_size_aux4;
+          device_param->kernel_local_mem_size_aux4 = 0;
 
-          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_aux4 = 0;
         }
       }
 
-      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
-      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
-      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
-      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
-
       // MP start
 
       if (user_options->slow_candidates == true)
@@ -10906,9 +13361,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (user_options->attack_mode == ATTACK_MODE_BF)
         {
-          // mp_l
+          // mp_l: l_markov
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "l_markov", &device_param->metal_function_mp_l, &device_param->metal_pipeline_mp_l) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "l_markov");
 
@@ -10918,17 +13373,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp_l, &device_param->kernel_preferred_wgs_multiple_mp_l) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_mp_l = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_l;
+          device_param->kernel_local_mem_size_mp_l = 0;
 
-          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_mp_l = 0;
 
-          // mp_r
+          // mp_r: r_markov
 
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1)
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "r_markov", &device_param->metal_function_mp_r, &device_param->metal_pipeline_mp_r) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "r_markov");
 
@@ -10938,26 +13393,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1;
-
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_mp_r = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp_r;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r) == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size;
+          device_param->kernel_local_mem_size_mp_r = 0;
 
-          if (user_options->attack_mode == ATTACK_MODE_BF)
-          {
-            if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
-            {
-              //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
-              //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
-            }
-          }
+          device_param->kernel_dynamic_local_mem_size_mp_r = 0;
         }
         else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
         {
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1)
+          // mp_c: C_markov
+
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "C_markov", &device_param->metal_function_mp, &device_param->metal_pipeline_mp) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov");
 
@@ -10967,17 +13415,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_wgs_mp) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_preferred_wgs_multiple_mp) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp;
+          device_param->kernel_local_mem_size_mp = 0;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_mp = 0;
         }
         else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
         {
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1)
+          // mp_c: C_markov
+
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_mp, "C_markov", &device_param->metal_function_mp, &device_param->metal_pipeline_mp) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "C_markov");
 
@@ -10987,13 +13437,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_wgs_mp) == -1) return -1;
 
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_mp, &device_param->kernel_preferred_wgs_multiple_mp) == -1) return -1;
 
-          device_param->kernel_dynamic_local_mem_size_mp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_mp;
+          device_param->kernel_local_mem_size_mp = 0;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size;
+          device_param->kernel_dynamic_local_mem_size_mp = 0;
         }
       }
 
@@ -11008,7 +13458,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         }
         else
         {
-          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1)
+          // amp
+
+          if (hc_mtlCreateKernel (hashcat_ctx, device_param->metal_device, device_param->metal_library_amp, "amp", &device_param->metal_function_amp, &device_param->metal_pipeline_amp) == -1)
           {
             event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "amp");
 
@@ -11018,51 +13470,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1;
-
-          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1;
-
-          device_param->kernel_dynamic_local_mem_size_amp = device_param->device_local_mem_size - device_param->kernel_local_mem_size_amp;
-
-          device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size;
-        }
-
-        /*
-        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-        {
-          // nothing to do
-        }
-        else
-        {
-          for (u32 i = 0; i < 5; i++)
-          {
-            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
-
-            //if (CL_rc == -1) return -1;
-          }
-
-          for (u32 i = 5; i < 6; i++)
-          {
-            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
+          if (hc_mtlGetMaxTotalThreadsPerThreadgroup (hashcat_ctx, device_param->metal_pipeline_amp, &device_param->kernel_wgs_amp) == -1) return -1;
 
-            //if (CL_rc == -1) return -1;
-          }
+          if (hc_mtlGetThreadExecutionWidth (hashcat_ctx, device_param->metal_pipeline_amp, &device_param->kernel_preferred_wgs_multiple_amp) == -1) return -1;
 
-          for (u32 i = 6; i < 7; i++)
-          {
-            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
+          device_param->kernel_local_mem_size_amp = 0;
 
-            //if (CL_rc == -1) return -1;
-          }
+          device_param->kernel_dynamic_local_mem_size_amp = 0;
         }
-        */
       }
 
       // zero some data buffers
 
-      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
-      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown)   == -1) return -1;
-      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result,        device_param->size_results) == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_digests_shown, device_param->size_shown)   == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result,        device_param->size_results) == -1) return -1;
 
       /**
        * special buffers
@@ -11070,28 +13492,28 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (user_options->slow_candidates == true)
       {
-        if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+        if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, size_rules_c) == -1) return -1;
       }
       else
       {
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, size_rules_c) == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs,          size_combs)       == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c,        size_combs)       == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf,   size_root_css)    == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css)  == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_combs,          size_combs)       == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_combs_c,        size_combs)       == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_markov_css_buf, size_markov_css)  == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
         {
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs,            size_bfs)         == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c,          size_bfs)         == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c,           size_tm)          == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf,   size_root_css)    == -1) return -1;
-          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css)  == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_bfs,            size_bfs)         == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_bfs_c,          size_bfs)         == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tm_c,           size_tm)          == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_markov_css_buf, size_markov_css)  == -1) return -1;
         }
       }
 
@@ -11124,8 +13546,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             device_param->kernel_params_mp_buf32[6] = 0;
             device_param->kernel_params_mp_buf32[7] = 0;
           }
-
-          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
         }
         else if (user_options->attack_mode == ATTACK_MODE_BF)
         {
@@ -11142,12 +13562,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_l_buf32[6] = full80;
           if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
           if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
-
-          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
         }
       }
     }
+    #endif // __APPLE__
 
     if (device_param->is_opencl == true)
     {
@@ -12164,13 +14582,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         const size_t undocumented_single_allocation_apple = 0x7fffffff;
 
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
-        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
         if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
         if (size_bfs                > undocumented_single_allocation_apple) memory_limit_hit = 1;
         if (size_combs              > undocumented_single_allocation_apple) memory_limit_hit = 1;
@@ -12337,6 +14748,25 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks,         device_param->size_hooks)    == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws,      NULL, &device_param->metal_d_pws_buf)      == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws_amp,  NULL, &device_param->metal_d_pws_amp_buf)  == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws_comp, NULL, &device_param->metal_d_pws_comp_buf) == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_pws_idx,  NULL, &device_param->metal_d_pws_idx)      == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_tmps,     NULL, &device_param->metal_d_tmps)         == -1) return -1;
+      if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, size_hooks,    NULL, &device_param->metal_d_hooks)        == -1) return -1;
+
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_buf,       device_param->size_pws)      == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_amp_buf,   device_param->size_pws_amp)  == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_comp_buf,  device_param->size_pws_comp) == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_idx,       device_param->size_pws_idx)  == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tmps,          device_param->size_tmps)     == -1) return -1;
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_hooks,         device_param->size_hooks)    == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->opencl_d_pws_buf)      == -1) return -1;
@@ -12379,7 +14809,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->scratch_buf = scratch_buf;
 
     #ifdef WITH_BRAIN
-
     u8 *brain_link_in_buf = (u8 *) hcmalloc (size_brain_link_in);
 
     device_param->brain_link_in_buf = brain_link_in_buf;
@@ -12415,6 +14844,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params[ 5] = &device_param->hip_d_hooks;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      device_param->kernel_params[ 0] = device_param->metal_d_pws_buf;
+      device_param->kernel_params[ 4] = device_param->metal_d_tmps;
+      device_param->kernel_params[ 5] = device_param->metal_d_hooks;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf;
@@ -12453,6 +14891,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? device_param->metal_d_pws_buf
+                                              : device_param->metal_d_pws_amp_buf;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -12484,6 +14931,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? device_param->metal_d_pws_buf
+                                              : device_param->metal_d_pws_amp_buf;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -12518,6 +14974,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          device_param->kernel_params_amp[0] = device_param->metal_d_pws_buf;
+          device_param->kernel_params_amp[1] = device_param->metal_d_pws_amp_buf;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf;
@@ -12555,6 +15019,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      device_param->kernel_params_decompress[0] = device_param->metal_d_pws_idx;
+      device_param->kernel_params_decompress[1] = device_param->metal_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                ? device_param->metal_d_pws_buf
+                                                : device_param->metal_d_pws_amp_buf;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx;
@@ -12919,6 +15394,153 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->hip_context              = NULL;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (device_param->metal_d_pws_buf)              hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_buf);
+      if (device_param->metal_d_pws_amp_buf)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_amp_buf);
+      if (device_param->metal_d_pws_comp_buf)         hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_comp_buf);
+      if (device_param->metal_d_pws_idx)              hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_pws_idx);
+      if (device_param->metal_d_rules)                hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_rules);
+      if (device_param->metal_d_rules_c)              hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_rules_c);
+      if (device_param->metal_d_combs)                hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_combs);
+      if (device_param->metal_d_combs_c)              hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_combs_c);
+      if (device_param->metal_d_bfs)                  hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bfs);
+      if (device_param->metal_d_bfs_c)                hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bfs_c);
+      if (device_param->metal_d_bitmap_s1_a)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_a);
+      if (device_param->metal_d_bitmap_s1_b)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_b);
+      if (device_param->metal_d_bitmap_s1_c)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_c);
+      if (device_param->metal_d_bitmap_s1_d)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s1_d);
+      if (device_param->metal_d_bitmap_s2_a)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_a);
+      if (device_param->metal_d_bitmap_s2_b)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_b);
+      if (device_param->metal_d_bitmap_s2_c)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_c);
+      if (device_param->metal_d_bitmap_s2_d)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_bitmap_s2_d);
+      if (device_param->metal_d_plain_bufs)           hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_plain_bufs);
+      if (device_param->metal_d_digests_buf)          hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_digests_buf);
+      if (device_param->metal_d_digests_shown)        hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_digests_shown);
+      if (device_param->metal_d_salt_bufs)            hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_salt_bufs);
+      if (device_param->metal_d_esalt_bufs)           hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_esalt_bufs);
+      if (device_param->metal_d_tmps)                 hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_tmps);
+      if (device_param->metal_d_hooks)                hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_hooks);
+      if (device_param->metal_d_result)               hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_result);
+      if (device_param->metal_d_extra0_buf)           hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra0_buf);
+      if (device_param->metal_d_extra1_buf)           hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra1_buf);
+      if (device_param->metal_d_extra2_buf)           hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra2_buf);
+      if (device_param->metal_d_extra3_buf)           hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_extra3_buf);
+      if (device_param->metal_d_root_css_buf)         hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_root_css_buf);
+      if (device_param->metal_d_markov_css_buf)       hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_markov_css_buf);
+      if (device_param->metal_d_tm_c)                 hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_tm_c);
+      if (device_param->metal_d_st_digests_buf)       hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_st_digests_buf);
+      if (device_param->metal_d_st_salts_buf)         hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_st_salts_buf);
+      if (device_param->metal_d_st_esalts_buf)        hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_st_esalts_buf);
+      if (device_param->metal_d_kernel_param)         hc_mtlReleaseMemObject (hashcat_ctx, device_param->metal_d_kernel_param);
+
+      if (device_param->metal_function1)              hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function1);
+      if (device_param->metal_function12)             hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function12);
+      if (device_param->metal_function2p)             hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function2p);
+      if (device_param->metal_function2)              hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function2);
+      if (device_param->metal_function2e)             hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function2e);
+      if (device_param->metal_function23)             hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function23);
+      if (device_param->metal_function3)              hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function3);
+      if (device_param->metal_function4)              hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function4);
+      if (device_param->metal_function_init2)         hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_init2);
+      if (device_param->metal_function_loop2p)        hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_loop2p);
+      if (device_param->metal_function_loop2)         hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_loop2);
+      if (device_param->metal_function_mp)            hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_mp);
+      if (device_param->metal_function_mp_l)          hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_mp_l);
+      if (device_param->metal_function_mp_r)          hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_mp_r);
+      if (device_param->metal_function_tm)            hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_tm);
+      if (device_param->metal_function_amp)           hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_amp);
+      if (device_param->metal_function_memset)        hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_memset);
+      if (device_param->metal_function_bzero)         hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_bzero);
+      if (device_param->metal_function_atinit)        hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_atinit);
+      if (device_param->metal_function_utf8toutf16le) hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_utf8toutf16le);
+      if (device_param->metal_function_decompress)    hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_decompress);
+      if (device_param->metal_function_aux1)          hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux1);
+      if (device_param->metal_function_aux2)          hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux2);
+      if (device_param->metal_function_aux3)          hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux3);
+      if (device_param->metal_function_aux4)          hc_mtlReleaseFunction (hashcat_ctx, device_param->metal_function_aux4);
+
+      if (device_param->metal_library)                hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library);
+      if (device_param->metal_library_mp)             hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_mp);
+      if (device_param->metal_library_amp)            hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp);
+      if (device_param->metal_library_shared)         hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared);
+
+      if (device_param->metal_command_queue)          hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
+
+      //if (device_param->metal_device)                 hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device);
+
+      device_param->metal_d_pws_buf              = NULL;
+      device_param->metal_d_pws_amp_buf          = NULL;
+      device_param->metal_d_pws_comp_buf         = NULL;
+      device_param->metal_d_pws_idx              = NULL;
+      device_param->metal_d_rules                = NULL;
+      device_param->metal_d_rules_c              = NULL;
+      device_param->metal_d_combs                = NULL;
+      device_param->metal_d_combs_c              = NULL;
+      device_param->metal_d_bfs                  = NULL;
+      device_param->metal_d_bfs_c                = NULL;
+      device_param->metal_d_bitmap_s1_a          = NULL;
+      device_param->metal_d_bitmap_s1_b          = NULL;
+      device_param->metal_d_bitmap_s1_c          = NULL;
+      device_param->metal_d_bitmap_s1_d          = NULL;
+      device_param->metal_d_bitmap_s2_a          = NULL;
+      device_param->metal_d_bitmap_s2_b          = NULL;
+      device_param->metal_d_bitmap_s2_c          = NULL;
+      device_param->metal_d_bitmap_s2_d          = NULL;
+      device_param->metal_d_plain_bufs           = NULL;
+      device_param->metal_d_digests_buf          = NULL;
+      device_param->metal_d_digests_shown        = NULL;
+      device_param->metal_d_salt_bufs            = NULL;
+      device_param->metal_d_esalt_bufs           = NULL;
+      device_param->metal_d_tmps                 = NULL;
+      device_param->metal_d_hooks                = NULL;
+      device_param->metal_d_result               = NULL;
+      device_param->metal_d_extra0_buf           = NULL;
+      device_param->metal_d_extra1_buf           = NULL;
+      device_param->metal_d_extra2_buf           = NULL;
+      device_param->metal_d_extra3_buf           = NULL;
+      device_param->metal_d_root_css_buf         = NULL;
+      device_param->metal_d_markov_css_buf       = NULL;
+      device_param->metal_d_tm_c                 = NULL;
+      device_param->metal_d_st_digests_buf       = NULL;
+      device_param->metal_d_st_salts_buf         = NULL;
+      device_param->metal_d_st_esalts_buf        = NULL;
+      device_param->metal_d_kernel_param         = NULL;
+      device_param->metal_function1              = NULL;
+      device_param->metal_function12             = NULL;
+      device_param->metal_function2p             = NULL;
+      device_param->metal_function2              = NULL;
+      device_param->metal_function2e             = NULL;
+      device_param->metal_function23             = NULL;
+      device_param->metal_function3              = NULL;
+      device_param->metal_function4              = NULL;
+      device_param->metal_function_init2         = NULL;
+      device_param->metal_function_loop2p        = NULL;
+      device_param->metal_function_loop2         = NULL;
+      device_param->metal_function_mp            = NULL;
+      device_param->metal_function_mp_l          = NULL;
+      device_param->metal_function_mp_r          = NULL;
+      device_param->metal_function_tm            = NULL;
+      device_param->metal_function_amp           = NULL;
+      device_param->metal_function_memset        = NULL;
+      device_param->metal_function_bzero         = NULL;
+      device_param->metal_function_atinit        = NULL;
+      device_param->metal_function_utf8toutf16le = NULL;
+      device_param->metal_function_decompress    = NULL;
+      device_param->metal_function_aux1          = NULL;
+      device_param->metal_function_aux2          = NULL;
+      device_param->metal_function_aux3          = NULL;
+      device_param->metal_function_aux4          = NULL;
+      device_param->metal_library                = NULL;
+      device_param->metal_library_mp             = NULL;
+      device_param->metal_library_amp            = NULL;
+      device_param->metal_library_shared         = NULL;
+      device_param->metal_command_queue          = NULL;
+      //device_param->metal_device                 = NULL;
+    }
+    #endif // __APPLE__
+
     if (device_param->is_opencl == true)
     {
       if (device_param->opencl_d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf);
@@ -13207,6 +15829,14 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
       if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->hip_stream) == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_root_css_buf,   0, mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_markov_css_buf, 0, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf,   CL_FALSE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL) == -1) return -1;
@@ -13255,6 +15885,14 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
       if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->hip_stream) == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_root_css_buf,   0, mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_markov_css_buf, 0, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf,   CL_FALSE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL) == -1) return -1;
diff --git a/src/ext_metal.m b/src/ext_metal.m
new file mode 100644
index 000000000..c4e8a3a03
--- /dev/null
+++ b/src/ext_metal.m
@@ -0,0 +1,1416 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "memory.h"
+#include "event.h"
+#include "timer.h"
+#include "ext_metal.h"
+
+#include <sys/sysctl.h>
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <Foundation/Foundation.h>
+#include <Metal/Metal.h>
+
+typedef NS_ENUM(NSUInteger, hc_mtlFeatureSet)
+{
+  MTL_FEATURESET_MACOS_GPUFAMILY_1_V1 = 10000,
+  MTL_FEATURESET_MACOS_GPUFAMILY_1_V2 = 10001,
+  MTL_FEATURESET_MACOS_GPUFAMILY_1_V3 = 10003,
+  MTL_FEATURESET_MACOS_GPUFAMILY_1_V4 = 10004,
+  MTL_FEATURESET_MACOS_GPUFAMILY_2_V1 = 10005,
+
+} metalDeviceFeatureSet_macOS_t;
+
+typedef NS_ENUM(NSUInteger, hc_mtlLanguageVersion)
+{
+  MTL_LANGUAGEVERSION_1_0 = (1 << 16),
+  MTL_LANGUAGEVERSION_1_1 = (1 << 16) + 1,
+  MTL_LANGUAGEVERSION_1_2 = (1 << 16) + 2,
+  MTL_LANGUAGEVERSION_2_0 = (2 << 16),
+  MTL_LANGUAGEVERSION_2_1 = (2 << 16) + 1,
+  MTL_LANGUAGEVERSION_2_2 = (2 << 16) + 2,
+  MTL_LANGUAGEVERSION_2_3 = (2 << 16) + 3,
+  MTL_LANGUAGEVERSION_2_4 = (2 << 16) + 4,
+
+} metalLanguageVersion_t;
+
+static bool iokit_getGPUCore (void *hashcat_ctx, int *gpu_core)
+{
+  bool rc = false;
+
+  CFMutableDictionaryRef matching = IOServiceMatching ("IOAccelerator");
+
+  io_service_t service = IOServiceGetMatchingService (kIOMasterPortDefault, matching);
+
+  if (!service)
+  {
+    event_log_error (hashcat_ctx, "IOServiceGetMatchingService(): %08x", service);
+
+    return rc;
+  }
+
+  // "gpu-core-count" is present only on Apple Silicon
+
+  CFNumberRef num = IORegistryEntryCreateCFProperty(service, CFSTR("gpu-core-count"), kCFAllocatorDefault, 0);
+
+  int gc = 0;
+
+  if (num == nil || CFNumberGetValue (num, kCFNumberIntType, &gc) == false)
+  {
+    //event_log_error (hashcat_ctx, "IORegistryEntryCreateCFProperty(): 'gpu-core-count' entry not found");
+  }
+  else
+  {
+    *gpu_core = gc;
+
+    rc = true;
+  }
+
+  IOObjectRelease (service);
+
+  return rc;
+}
+
+static int hc_mtlInvocationHelper (id target, SEL selector, void *returnValue)
+{
+  if (target == nil) return -1;
+  if (selector == nil) return -1;
+
+  if ([target respondsToSelector: selector])
+  {
+    NSMethodSignature *signature = [object_getClass (target) instanceMethodSignatureForSelector: selector];
+    NSInvocation *invocation = [NSInvocation invocationWithMethodSignature: signature];
+    [invocation setTarget: target];
+    [invocation setSelector: selector];
+    [invocation invoke];
+    [invocation getReturnValue: returnValue];
+
+    return 0;
+  }
+
+  return -1;
+}
+
+static int hc_mtlBuildOptionsToDict (void *hashcat_ctx, const char *build_options_buf, const char *include_path, NSMutableDictionary *build_options_dict)
+{
+  if (build_options_buf == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): build_options_buf is NULL", __func__);
+    return -1;
+  }
+
+  if (build_options_dict == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): build_options_dict is NULL", __func__);
+    return -1;
+  }
+
+  // NSString from build_options_buf
+
+  NSString *options = [NSString stringWithCString: build_options_buf encoding: NSUTF8StringEncoding];
+
+  if (options == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): stringWithCString failed", __func__);
+    return -1;
+  }
+
+  // replace '-D ' to ''
+
+  options = [options stringByReplacingOccurrencesOfString:@"-D " withString:@""];
+
+  if (options == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): stringByReplacingOccurrencesOfString(-D) failed", __func__);
+    return -1;
+  }
+
+  // replace '-I OpenCL ' to ''
+
+  options = [options stringByReplacingOccurrencesOfString:@"-I OpenCL " withString:@""];
+
+  if (options == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): stringByReplacingOccurrencesOfString(-I OpenCL) failed", __func__);
+    return -1;
+  }
+
+  //NSLog(@"options: '%@'", options);
+
+  // creating NSDictionary from options
+
+  NSArray *lines = [options componentsSeparatedByCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
+
+  for (NSString *aKeyValue in lines)
+  {
+    NSArray *components = [aKeyValue componentsSeparatedByString:@"="];
+
+    NSString *key = [components[0] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
+    NSString *value = nil;
+
+    if ([components count] != 2)
+    {
+      if ([key isEqualToString:[NSString stringWithUTF8String:"KERNEL_STATIC"]] ||
+          [key isEqualToString:[NSString stringWithUTF8String:"IS_APPLE_SILICON"]] ||
+          [key isEqualToString:[NSString stringWithUTF8String:"DYNAMIC_LOCAL"]] ||
+          [key isEqualToString:[NSString stringWithUTF8String:"_unroll"]] ||
+          [key isEqualToString:[NSString stringWithUTF8String:"NO_UNROLL"]] ||
+          [key isEqualToString:[NSString stringWithUTF8String:"FORCE_DISABLE_SHM"]])
+      {
+        value = @"1";
+      }
+      else
+      {
+        //event_log_warning (hashcat_ctx, "%s(): skipping malformed build option: %s", __func__, [key UTF8String]);
+
+        continue;
+      }
+    }
+    else
+    {
+      value = [components[1] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
+    }
+
+    [build_options_dict setObject:value forKey:key];
+  }
+
+  // if set, add INCLUDE_PATH to hack Apple kernel build from source limitation on -I usage
+  if (include_path != nil)
+  {
+    NSString *path_key = @"INCLUDE_PATH";
+    NSString *path_value = [NSString stringWithCString: include_path encoding: NSUTF8StringEncoding];
+
+    [build_options_dict setObject:path_value forKey:path_key];
+  }
+
+  //NSLog(@"Dict:\n%@", build_options_dict);
+
+  return 0;
+}
+
+int mtl_init (void *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  memset (mtl, 0, sizeof (MTL_PTR));
+
+  mtl->devices = nil;
+
+  if (MTLCreateSystemDefaultDevice() == nil)
+  {
+    event_log_error (hashcat_ctx, "Metal is not supported on this computer");
+
+    return -1;
+  }
+
+  return 0;
+}
+
+void mtl_close (void *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl)
+  {
+    if (mtl->devices)
+    {
+      int count = (int) CFArrayGetCount (mtl->devices);
+      for (int i = 0; i < count; i++)
+      {
+        mtl_device_id device = (mtl_device_id) CFArrayGetValueAtIndex (mtl->devices, i);
+        if (device != nil)
+        {
+          hc_mtlReleaseDevice (hashcat_ctx, device);
+        }
+      }
+      mtl->devices = nil;
+    }
+
+    hcfree (backend_ctx->mtl);
+
+    backend_ctx->mtl = NULL;
+  }
+}
+
+int hc_mtlDeviceGetCount (void *hashcat_ctx, int *count)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == nil) return -1;
+
+  CFArrayRef devices = (CFArrayRef) MTLCopyAllDevices();
+
+  if (devices == nil)
+  {
+    event_log_error (hashcat_ctx, "metalDeviceGetCount(): empty device objects");
+
+    return -1;
+  }
+
+  mtl->devices = devices;
+
+  *count = CFArrayGetCount (devices);
+
+  return 0;
+}
+
+int hc_mtlDeviceGet (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == nil) return -1;
+
+  if (mtl->devices == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid devices pointer", __func__);
+
+    return -1;
+  }
+
+  mtl_device_id device = (mtl_device_id) CFArrayGetValueAtIndex (mtl->devices, ordinal);
+
+  if (device == nil)
+  {
+    event_log_error (hashcat_ctx, "metalDeviceGet(): invalid index");
+
+    return -1;
+  }
+
+  *metal_device = device;
+
+  return 0;
+}
+
+int hc_mtlDeviceGetName (void *hashcat_ctx, char *name, size_t len, mtl_device_id metal_device)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  if (len <= 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): buffer length", __func__);
+
+    return -1;
+  }
+
+  id device_name_ptr = [metal_device name];
+
+  if (device_name_ptr == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to get device name", __func__);
+
+    return -1;
+  }
+
+  const char *device_name_str = [device_name_ptr UTF8String];
+
+  if (device_name_str == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to get UTF8String from device name", __func__);
+
+    return -1;
+  }
+
+  const size_t device_name_len = strlen (device_name_str);
+
+  if (device_name_len <= 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device name length", __func__);
+
+    return -1;
+  }
+
+  if (strncpy (name, device_name_str, (device_name_len > len) ? len : device_name_len) != name)
+  {
+    event_log_error (hashcat_ctx, "%s(): strncpy failed", __func__);
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t attrib, mtl_device_id metal_device)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  uint64_t val64 = 0;
+  bool valBool = false;
+  int valInt = 0;
+
+  switch (attrib)
+  {
+    case MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT:
+      // works only with Apple Silicon
+      if (iokit_getGPUCore (hashcat_ctx, pi) == false) *pi = 1;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY:
+      *pi = 0;
+
+      SEL hasUnifiedMemorySelector = NSSelectorFromString(@"hasUnifiedMemory");
+
+      hc_mtlInvocationHelper (metal_device, hasUnifiedMemorySelector, &valBool);
+
+      *pi = (valBool == true) ? 1 : 0;
+
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_WARP_SIZE:
+      // return a fake size of 32, it will be updated later
+      *pi = 32;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR:
+      *pi = 0;
+
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_2_V1] == true) *pi = 2;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V4] == true) *pi = 1;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V3] == true) *pi = 1;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V2] == true) *pi = 1;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V1] == true) *pi = 1;
+
+      if (*pi == 0)
+      {
+        //event_log_error (hashcat_ctx, "%s(): no feature sets supported", __func__);
+        return -1;
+      }
+
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR:
+      *pi = 0;
+
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_2_V1] == true) *pi = 1;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V4] == true) *pi = 4;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V3] == true) *pi = 3;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V2] == true) *pi = 2;
+      if (*pi == 0 && [metal_device supportsFeatureSet:MTL_FEATURESET_MACOS_GPUFAMILY_1_V1] == true) *pi = 1;
+
+      if (*pi == 0)
+      {
+        //event_log_error (hashcat_ctx, "%s(): no feature sets supported", __func__);
+        return -1;
+      }
+
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
+      // M1 max is 1024
+      // [MTLComputePipelineState maxTotalThreadsPerThreadgroup]
+      *pi = 1024;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_CLOCK_RATE:
+      // unknown
+      *pi = 1000000;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK:
+      // 32k
+      *pi = 32768;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY:
+      // Maximum function memory allocation for a buffer in the constant address space
+      // 64k
+      *pi = 64 * 1024;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE:
+      val64 = 0;
+
+      SEL maxTransferRateSelector = NSSelectorFromString(@"maxTransferRate");
+
+      hc_mtlInvocationHelper (metal_device, maxTransferRateSelector, &val64);
+
+      *pi = (val64 == 0) ? 0 : val64 / 125; // kb/s
+
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_HEADLESS:
+      valBool = [metal_device isHeadless];
+      *pi = (valBool == true) ? 1 : 0;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_LOW_POWER:
+      valBool = [metal_device isLowPower];
+      *pi = (valBool == true) ? 1 : 0;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_REMOVABLE:
+      valBool = [metal_device isRemovable];
+      *pi = (valBool == true) ? 1 : 0;
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_REGISTRY_ID:
+      *pi = (int) [metal_device registryID];
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION:
+      *pi = 0;
+
+      SEL locationSelector = NSSelectorFromString(@"location");
+      valInt = 0;
+
+      hc_mtlInvocationHelper (metal_device, locationSelector, &valInt);
+
+      *pi = valInt;
+
+      break;
+
+    case MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER:
+      *pi = 0;
+
+      SEL locationNumberSelector = NSSelectorFromString(@"locationNumber");
+
+      valInt = 0;
+      hc_mtlInvocationHelper (metal_device, locationNumberSelector, &valInt);
+
+      *pi = valInt;
+
+      break;
+
+    default:
+      event_log_error (hashcat_ctx, "%s(): unknown attribute (%d)", __func__, attrib);
+      return -1;
+  }
+
+  return 0;
+}
+
+int hc_mtlMemGetInfo (void *hashcat_ctx, size_t *mem_free, size_t *mem_total)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  struct vm_statistics64 vm_stats;
+  vm_size_t page_size = 0;
+  unsigned int count = HOST_VM_INFO64_COUNT;
+
+  mach_port_t port = mach_host_self();
+
+  if (host_page_size (port, &page_size) != KERN_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "metalMemGetInfo(): cannot get page_size");
+
+    return -1;
+  }
+
+  if (host_statistics64 (port, HOST_VM_INFO64, (host_info64_t) &vm_stats, &count) != KERN_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "metalMemGetInfo(): cannot get vm_stats");
+
+    return -1;
+  }
+
+  uint64_t mem_free_tmp = (uint64_t) (vm_stats.free_count - vm_stats.speculative_count) * page_size;
+  uint64_t mem_used_tmp = (uint64_t) (vm_stats.active_count + vm_stats.inactive_count + vm_stats.wire_count) * page_size;
+
+  *mem_free = (size_t) mem_free_tmp;
+  *mem_total = (size_t) (mem_free_tmp + mem_used_tmp);
+
+  return 0;
+}
+
+int hc_mtlDeviceMaxMemAlloc (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  uint64_t memsize = 0;
+
+  SEL maxBufferLengthSelector = NSSelectorFromString(@"maxBufferLength");
+
+  if (hc_mtlInvocationHelper (metal_device, maxBufferLengthSelector, &memsize) == -1) return -1;
+
+  if (memsize == 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid maxBufferLength", __func__);
+
+    return -1;
+  }
+
+  *bytes = (size_t) memsize;
+
+  return 0;
+}
+
+int hc_mtlDeviceTotalMem (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  uint64_t memsize = 0;
+
+  if (true)
+  {
+    memsize = [metal_device recommendedMaxWorkingSetSize];
+  }
+  else
+  {
+    size_t len = sizeof (memsize);
+
+    if (sysctlbyname ("hw.memsize", &memsize, &len, NULL, 0) != 0)
+    {
+      event_log_error (hashcat_ctx, "%s(): sysctlbyname(hw.memsize) failed", __func__);
+
+      return -1;
+    }
+  }
+
+  if (memsize == 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid memory size", __func__);
+
+    return -1;
+  }
+
+  *bytes = (size_t) memsize;
+
+  return 0;
+}
+
+int hc_mtlCreateCommandQueue (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue *command_queue)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  mtl_command_queue queue = [metal_device newCommandQueue];
+
+  if (queue == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create newCommandQueue", __func__);
+
+    return -1;
+  }
+
+  *command_queue = queue;
+
+  return 0;
+
+}
+
+int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_library metal_library, const char *func_name, mtl_function *metal_function, mtl_pipeline *metal_pipeline)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  if (metal_library == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid library", __func__);
+
+    return -1;
+  }
+
+  if (func_name == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid function name", __func__);
+
+    return -1;
+  }
+
+  NSError *error = nil;
+
+  NSString *f_name = [NSString stringWithCString: func_name encoding: NSUTF8StringEncoding];
+
+  mtl_function mtl_func = [metal_library newFunctionWithName: f_name];
+
+  if (mtl_func == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create '%s' function", __func__, func_name);
+
+    return -1;
+  }
+
+  mtl_pipeline mtl_pipe = [metal_device newComputePipelineStateWithFunction: mtl_func error: &error];
+
+  if (error != nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create '%s' pipeline, %s", __func__, func_name, [[error localizedDescription] UTF8String]);
+
+    return -1;
+  }
+
+  if (mtl_pipe == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create '%s' pipeline", __func__, func_name);
+
+    return -1;
+  }
+
+  *metal_function = mtl_func;
+  *metal_pipeline = mtl_pipe;
+
+  return 0;
+}
+
+int hc_mtlGetMaxTotalThreadsPerThreadgroup (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *maxTotalThreadsPerThreadgroup)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_pipeline == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid pipeline", __func__);
+
+    return -1;
+  }
+
+  *maxTotalThreadsPerThreadgroup = [metal_pipeline maxTotalThreadsPerThreadgroup];
+
+  return 0;
+}
+
+int hc_mtlGetThreadExecutionWidth (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *threadExecutionWidth)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_pipeline == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid pipeline", __func__);
+
+    return -1;
+  }
+
+  *threadExecutionWidth = [metal_pipeline threadExecutionWidth];
+
+  return 0;
+}
+
+int hc_mtlCreateBuffer (void *hashcat_ctx, mtl_device_id metal_device, size_t size, void *ptr, mtl_mem *metal_buffer)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid device", __func__);
+
+    return -1;
+  }
+
+  mtl_mem buf = NULL;
+
+  MTLResourceOptions bufferOptions = MTLResourceStorageModeShared;
+
+  if (ptr == NULL)
+  {
+    buf = [metal_device newBufferWithLength:size options:bufferOptions];
+  }
+  else
+  {
+    buf = [metal_device newBufferWithBytes:ptr length:size options:bufferOptions];
+  }
+
+  if (buf == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): %s failed (size: %zu)", __func__, (ptr == NULL) ? "newBufferWithLength" : "newBufferWithBytes", size);
+
+    return -1;
+  }
+
+  *metal_buffer = buf;
+
+  return 0;
+}
+
+int hc_mtlReleaseMemObject (void *hashcat_ctx, mtl_mem metal_buffer)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  MTL_PTR *mtl = (MTL_PTR *) backend_ctx->mtl;
+
+  if (mtl == NULL) return -1;
+
+  if (metal_buffer == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal buffer", __func__);
+
+    return -1;
+  }
+
+  [metal_buffer setPurgeableState:MTLPurgeableStateEmpty];
+  [metal_buffer release];
+
+  return 0;
+}
+
+int hc_mtlReleaseFunction (void *hashcat_ctx, mtl_function metal_function)
+{
+  if (metal_function == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal function", __func__);
+
+    return -1;
+  }
+
+  [metal_function release];
+
+  return 0;
+}
+
+int hc_mtlReleaseLibrary (void *hashcat_ctx, mtl_library metal_library)
+{
+  if (metal_library == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal library", __func__);
+
+    return -1;
+  }
+
+  [metal_library release];
+  metal_library = nil;
+
+  return 0;
+}
+
+int hc_mtlReleaseCommandQueue (void *hashcat_ctx, mtl_command_queue command_queue)
+{
+  if (command_queue == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal command queue", __func__);
+
+    return -1;
+  }
+
+  [command_queue release];
+  command_queue = nil;
+
+  return 0;
+}
+
+int hc_mtlReleaseDevice (void *hashcat_ctx, mtl_device_id metal_device)
+{
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal device", __func__);
+
+    return -1;
+  }
+
+  [metal_device release];
+  metal_device = nil;
+
+  return 0;
+}
+
+// device to device
+
+int hc_mtlMemcpyDtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, mtl_mem buf_src, size_t buf_src_off, size_t buf_size)
+{
+  if (command_queue == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal command queue is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_src == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal src buffer is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_src_off < 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): src buffer offset is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_dst == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal dst buffer is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_dst_off < 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): dst buffer offset is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_size <= 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): buffer size is invalid", __func__);
+    return -1;
+  }
+
+  id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
+
+  if (command_buffer == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create a new command buffer", __func__);
+    return -1;
+  }
+
+  id<MTLBlitCommandEncoder> blit_encoder = [command_buffer blitCommandEncoder];
+
+  if (blit_encoder == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create a blit command encoder", __func__);
+    return -1;
+  }
+
+  // copy
+
+  [blit_encoder copyFromBuffer: buf_src sourceOffset: buf_src_off toBuffer: buf_dst destinationOffset: buf_dst_off size: buf_size];
+
+  // finish encoding and start the data transfer
+
+  [blit_encoder endEncoding];
+  [command_buffer commit];
+
+  // Wait for complete
+
+  [command_buffer waitUntilCompleted];
+
+  return 0;
+}
+
+// host to device
+
+int hc_mtlMemcpyHtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem buf_dst, size_t buf_dst_off, const void *buf_src, size_t buf_size)
+{
+  if (command_queue == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal command queue is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_src == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal src buffer is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_dst == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): host dst buffer is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_size <= 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): buffer size is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_dst_off < 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): buffer dst offset is invalid", __func__);
+    return -1;
+  }
+
+  void *buf_dst_ptr = [buf_dst contents];
+
+  if (buf_dst_ptr == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to get metal buffer data pointer", __func__);
+
+    return -1;
+  }
+
+  if (memcpy (buf_dst_ptr + buf_dst_off, buf_src, buf_size) != buf_dst_ptr + buf_dst_off)
+  {
+    event_log_error (hashcat_ctx, "%s(): memcpy failed", __func__);
+
+    return -1;
+  }
+
+  [buf_dst didModifyRange: NSMakeRange (buf_dst_off, buf_size)];
+
+  return 0;
+}
+
+// device to host
+
+int hc_mtlMemcpyDtoH (void *hashcat_ctx, mtl_command_queue command_queue, void *buf_dst, mtl_mem buf_src, size_t buf_src_off, size_t buf_size)
+{
+  if (command_queue == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal command queue is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_src == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): metal src buffer is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_dst == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): host dst buffer is invalid", __func__);
+    return -1;
+  }
+
+  if (buf_size <= 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): buffer size is invalid", __func__);
+    return -1;
+  }
+
+  id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
+
+  if (command_buffer == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to create a new command buffer", __func__);
+    return -1;
+  }
+
+  id<MTLBlitCommandEncoder> blit_encoder = [command_buffer blitCommandEncoder];
+
+  [blit_encoder synchronizeResource: buf_src];
+
+  // Finish encoding and start the data transfer to the CPU
+
+  [blit_encoder endEncoding];
+  [command_buffer commit];
+
+  // Wait for complete
+
+  [command_buffer waitUntilCompleted];
+
+  // get src buf ptr
+
+  void *buf_src_ptr = [buf_src contents];
+
+  if (buf_src_ptr == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): failed to get metal buffer data pointer", __func__);
+
+    return -1;
+  }
+
+  if (memcpy (buf_dst, buf_src_ptr + buf_src_off, buf_size) != buf_dst)
+  {
+    event_log_error (hashcat_ctx, "%s(): memcpy failed", __func__);
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_mtlRuntimeGetVersionString (void *hashcat_ctx, char *runtimeVersion_str, size_t *size)
+{
+  CFURLRef plist_url = CFURLCreateWithFileSystemPath (kCFAllocatorDefault, CFSTR("/System/Library/Frameworks/Metal.framework/Versions/Current/Resources/version.plist"), kCFURLPOSIXPathStyle, false);
+
+  if (plist_url == NULL)
+  {
+    event_log_error (hashcat_ctx, "%s(): CFURLCreateWithFileSystemPath() failed\n", __func__);
+
+    return -1;
+  }
+
+  CFReadStreamRef plist_stream = CFReadStreamCreateWithFile (NULL, plist_url);
+
+  if (plist_stream == NULL)
+  {
+    event_log_error (hashcat_ctx, "%s(): CFReadStreamCreateWithFile() failed\n", __func__);
+
+    CFRelease (plist_url);
+
+    return -1;
+  }
+
+  if (CFReadStreamOpen (plist_stream) == false)
+  {
+    event_log_error (hashcat_ctx, "%s(): CFReadStreamOpen() failed\n", __func__);
+
+    CFRelease (plist_stream);
+    CFRelease (plist_url);
+
+    return -1;
+  }
+
+  CFPropertyListRef plist_prop = CFPropertyListCreateWithStream (NULL, plist_stream, 0, kCFPropertyListImmutable, NULL, NULL);
+
+  if (plist_prop == NULL)
+  {
+    event_log_error (hashcat_ctx, "%s(): CFPropertyListCreateWithStream() failed\n", __func__);
+
+    CFReadStreamClose (plist_stream);
+
+    CFRelease (plist_stream);
+    CFRelease (plist_url);
+    return -1;
+  }
+
+  CFStringRef runtime_version_str = CFRetain (CFDictionaryGetValue (plist_prop, CFSTR("CFBundleVersion")));
+
+  if (runtime_version_str != NULL)
+  {
+    if (runtimeVersion_str == NULL)
+    {
+      CFIndex len = CFStringGetLength (runtime_version_str);
+      CFIndex maxSize = CFStringGetMaximumSizeForEncoding (len, kCFStringEncodingUTF8) + 1;
+      *size = maxSize;
+      return 0;
+    }
+
+    CFIndex maxSize = *size;
+
+    if (CFStringGetCString (runtime_version_str, runtimeVersion_str, maxSize, kCFStringEncodingUTF8) == false)
+    {
+      event_log_error (hashcat_ctx, "%s(): CFStringGetCString() failed\n", __func__);
+
+      hcfree (runtimeVersion_str);
+
+      return -1;
+    }
+
+    return 0;
+  }
+
+  return -1;
+}
+
+int hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder)
+{
+  if (metal_pipeline == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_pipeline", __func__);
+
+    return -1;
+  }
+
+  if (metal_command_queue == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_command_queue", __func__);
+
+    return -1;
+  }
+
+  id<MTLCommandBuffer> metal_commandBuffer = [metal_command_queue commandBuffer];
+
+  if (metal_commandBuffer == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_commandBuffer", __func__);
+
+    return -1;
+  }
+
+  id<MTLComputeCommandEncoder> metal_commandEncoder = [metal_commandBuffer computeCommandEncoder];
+
+  if (metal_commandEncoder == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_commandBuffer", __func__);
+
+    return -1;
+  }
+
+  [metal_commandEncoder setComputePipelineState: metal_pipeline];
+
+  *metal_command_buffer = metal_commandBuffer;
+  *metal_command_encoder = metal_commandEncoder;
+
+  return 0;
+}
+
+int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size)
+{
+  if (metal_command_encoder == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_command_encoder", __func__);
+
+    return -1;
+  }
+
+  if (buf == nil && host_data == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid buf/host_data", __func__);
+
+    return -1;
+  }
+
+  if (buf == nil)
+  {
+    if (host_data_size <= 0)
+    {
+      event_log_error (hashcat_ctx, "%s(): invalid host_data size", __func__);
+
+      return -1;
+    }
+  }
+  else
+  {
+    if (off < 0)
+    {
+      event_log_error (hashcat_ctx, "%s(): invalid buf off", __func__);
+
+      return -1;
+    }
+  }
+
+  if (idx < 0)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid buf/host_data idx", __func__);
+
+    return -1;
+  }
+
+  if (host_data == nil)
+  {
+    [metal_command_encoder setBuffer: buf offset: off atIndex: idx];
+  }
+  else
+  {
+    [metal_command_encoder setBytes: host_data length: host_data_size atIndex: idx];
+  }
+
+  return 0;
+}
+
+int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms)
+{
+  MTLSize numThreadgroups = {local_work_size, 1, 1};
+  MTLSize threadsGroup = {global_work_size, 1, 1};
+
+  if (metal_command_encoder == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_command_encoder", __func__);
+
+    return -1;
+  }
+
+  if (metal_command_buffer == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal_command_buffer", __func__);
+
+    return -1;
+  }
+
+  [metal_command_encoder dispatchThreadgroups: threadsGroup threadsPerThreadgroup: numThreadgroups];
+
+  [metal_command_encoder endEncoding];
+  [metal_command_buffer commit];
+  [metal_command_buffer waitUntilCompleted];
+
+  CFTimeInterval myGPUStartTime = 0;
+  CFTimeInterval myGPUEndTime = 0;
+
+  SEL myGPUStartTimeSelector = NSSelectorFromString(@"GPUStartTime");
+  SEL myGPUEndTimeSelector   = NSSelectorFromString(@"GPUEndTime");
+
+  if (hc_mtlInvocationHelper (metal_command_buffer, myGPUStartTimeSelector, &myGPUStartTime) == -1) return -1;
+  if (hc_mtlInvocationHelper (metal_command_buffer, myGPUEndTimeSelector, &myGPUEndTime) == -1) return -1;
+
+  CFTimeInterval elapsed = myGPUEndTime - myGPUStartTime;
+
+  *ms = (1000.0 * elapsed);
+
+  return 0;
+}
+
+int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device, const char *cached_file, mtl_library *metal_library)
+{
+  NSError *error = nil;
+
+  if (metal_device == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metal device", __func__);
+
+    return -1;
+  }
+
+  if (cached_file == nil)
+  {
+    event_log_error (hashcat_ctx, "%s(): invalid metallib", __func__);
+
+    return -1;
+  }
+
+  NSString *k_string = [NSString stringWithCString: cached_file encoding: NSUTF8StringEncoding];
+
+  if (k_string != nil)
+  {
+    id <MTLLibrary> r = [metal_device newLibraryWithFile: k_string error: &error];
+
+    if (error != nil)
+    {
+      event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
+      return -1;
+    }
+
+    *metal_library = r;
+
+    return 0;
+  }
+
+  return -1;
+}
+
+int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device, const char *kernel_sources, const char *build_options_buf, const char *cpath, mtl_library *metal_library)
+{
+  NSError *error = nil;
+
+  NSString *k_string = [NSString stringWithCString: kernel_sources encoding: NSUTF8StringEncoding];
+
+  if (k_string != nil)
+  {
+    MTLCompileOptions *compileOptions = [MTLCompileOptions new];
+
+    NSMutableDictionary *build_options_dict = nil;
+
+    if (build_options_buf != nil)
+    {
+      //printf("using build_opts from arg:\n%s\n", build_options_buf);
+
+      build_options_dict = [NSMutableDictionary dictionary]; //[[NSMutableDictionary alloc] init];
+
+      if (hc_mtlBuildOptionsToDict (hashcat_ctx, build_options_buf, cpath, build_options_dict) == -1)
+      {
+        event_log_error (hashcat_ctx, "%s(): failed to build options dictionary", __func__);
+
+        [build_options_dict release];
+        return -1;
+      }
+
+      compileOptions.preprocessorMacros = build_options_dict;
+    }
+
+    // todo: detect current os version and choose the right
+//    compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
+/*
+    if (@available(macOS 12.0, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_4;
+    }
+    else if (@available(macOS 11.0, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
+    }
+    else if (@available(macOS 10.15, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_2;
+    }
+    else if (@available(macOS 10.14, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_1;
+    }
+    else if (@available(macOS 10.13, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_0;
+    }
+    else if (@available(macOS 10.12, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_1_2;
+    }
+    else if (@available(macOS 10.11, *))
+    {
+      compileOptions.languageVersion = MTL_LANGUAGEVERSION_1_1;
+    }
+*/
+    id<MTLLibrary> r = [metal_device newLibraryWithSource: k_string options: compileOptions error: &error];
+
+    [compileOptions release];
+    compileOptions = nil;
+
+    if (build_options_dict != nil)
+    {
+      [build_options_dict release];
+      build_options_dict = nil;
+    }
+
+    if (error != nil)
+    {
+      event_log_error (hashcat_ctx, "%s(): failed to create metal library, %s", __func__, [[error localizedDescription] UTF8String]);
+
+      return -1;
+    }
+
+    *metal_library = r;
+
+    return 0;
+  }
+
+  return -1;
+}
diff --git a/src/hashes.c b/src/hashes.c
index ffb47b182..3ec190812 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -355,6 +355,20 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
       }
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      rc = hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmps, device_param->metal_d_tmps, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size);
+
+      if (rc == -1)
+      {
+        hcfree (tmps);
+
+        return -1;
+      }
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_FALSE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event);
@@ -574,6 +588,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &num_cracked, device_param->metal_d_result, 0, sizeof (u32)) == -1) return -1;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     /* blocking */
@@ -624,6 +645,20 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
     }
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    rc = hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, cracked, device_param->metal_d_plain_bufs, 0, num_cracked * sizeof (plain_t));
+
+    if (rc == -1)
+    {
+      hcfree (cracked);
+
+      return -1;
+    }
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     /* blocking */
@@ -703,6 +738,18 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
         }
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        rc = run_metal_kernel_memset32 (hashcat_ctx, device_param, device_param->metal_d_digests_shown, salt_buf->digests_offset * sizeof (u32), 0, salt_buf->digests_cnt * sizeof (u32));
+
+        if (rc == -1)
+        {
+          break;
+        }
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         /* NOTE: run_opencl_kernel_bzero() does not handle buffer offset */
@@ -751,6 +798,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
     if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, sizeof (u32)) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result, sizeof (u32)) == -1) return -1;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, sizeof (u32)) == -1) return -1;
diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c
index 35edcf92b..c4ccbc51c 100644
--- a/src/modules/module_01500.c
+++ b/src/modules/module_01500.c
@@ -151,7 +151,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
     {
-      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+      hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff);
     }
 
     return jit_build_options;
@@ -161,7 +161,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
     {
-      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+      hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
     }
   }
   // ROCM
@@ -169,7 +169,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
     {
-      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+      hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
     }
   }
   // ROCM
@@ -177,7 +177,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
     {
-      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+      hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff);
     }
     else
     {
@@ -188,7 +188,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
     {
-      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+      hc_asprintf (&jit_build_options, "-D DESCRYPT_SALT=%u", hashes->salts_buf[0].salt_buf[0] & 0xfff);
     }
   }
 
diff --git a/src/modules/module_06211.c b/src/modules/module_06211.c
index 4217f28ed..43b455dba 100644
--- a/src/modules/module_06211.c
+++ b/src/modules/module_06211.c
@@ -77,7 +77,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_06212.c b/src/modules/module_06212.c
index 3d482b1b4..f610dc775 100644
--- a/src/modules/module_06212.c
+++ b/src/modules/module_06212.c
@@ -77,7 +77,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_06213.c b/src/modules/module_06213.c
index d6c164198..04803cc70 100644
--- a/src/modules/module_06213.c
+++ b/src/modules/module_06213.c
@@ -77,7 +77,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_06241.c b/src/modules/module_06241.c
index 752cbf39c..8189b3fc6 100644
--- a/src/modules/module_06241.c
+++ b/src/modules/module_06241.c
@@ -78,7 +78,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_06242.c b/src/modules/module_06242.c
index ee63223ce..6f93913dc 100644
--- a/src/modules/module_06242.c
+++ b/src/modules/module_06242.c
@@ -78,7 +78,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_06243.c b/src/modules/module_06243.c
index acd26004a..164fe982f 100644
--- a/src/modules/module_06243.c
+++ b/src/modules/module_06243.c
@@ -78,7 +78,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_06800.c b/src/modules/module_06800.c
index a897b4f69..d76376921 100644
--- a/src/modules/module_06800.c
+++ b/src/modules/module_06800.c
@@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_08900.c b/src/modules/module_08900.c
index bca25fc39..06507a856 100644
--- a/src/modules/module_08900.c
+++ b/src/modules/module_08900.c
@@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   // amdgpu-pro-20.50-1234664-ubuntu-20.04 (legacy)
@@ -266,7 +269,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64,
+  hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64,
     hashes->salts_buf[0].scrypt_N,
     hashes->salts_buf[0].scrypt_r,
     hashes->salts_buf[0].scrypt_p,
diff --git a/src/modules/module_09300.c b/src/modules/module_09300.c
index cb61a0f6c..74c4fe913 100644
--- a/src/modules/module_09300.c
+++ b/src/modules/module_09300.c
@@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
@@ -258,7 +261,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64,
+  hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64,
     hashes->salts_buf[0].scrypt_N,
     hashes->salts_buf[0].scrypt_r,
     hashes->salts_buf[0].scrypt_p,
diff --git a/src/modules/module_09500.c b/src/modules/module_09500.c
index cbbaca530..5fb4be868 100644
--- a/src/modules/module_09500.c
+++ b/src/modules/module_09500.c
@@ -62,7 +62,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_09600.c b/src/modules/module_09600.c
index a9831b9aa..d9753cbe2 100644
--- a/src/modules/module_09600.c
+++ b/src/modules/module_09600.c
@@ -65,7 +65,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_12700.c b/src/modules/module_12700.c
index 419ab38e9..f69858246 100644
--- a/src/modules/module_12700.c
+++ b/src/modules/module_12700.c
@@ -60,7 +60,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13711.c b/src/modules/module_13711.c
index df4b038ef..7d956350f 100644
--- a/src/modules/module_13711.c
+++ b/src/modules/module_13711.c
@@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13712.c b/src/modules/module_13712.c
index 585fde139..3093e80e2 100644
--- a/src/modules/module_13712.c
+++ b/src/modules/module_13712.c
@@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13713.c b/src/modules/module_13713.c
index c2d573f4d..3a53a4ef5 100644
--- a/src/modules/module_13713.c
+++ b/src/modules/module_13713.c
@@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13721.c b/src/modules/module_13721.c
index 2b54cf267..9db377617 100644
--- a/src/modules/module_13721.c
+++ b/src/modules/module_13721.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
diff --git a/src/modules/module_13722.c b/src/modules/module_13722.c
index 4b3242ca4..4a899279e 100644
--- a/src/modules/module_13722.c
+++ b/src/modules/module_13722.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
diff --git a/src/modules/module_13723.c b/src/modules/module_13723.c
index 3d28e8437..4871ca55a 100644
--- a/src/modules/module_13723.c
+++ b/src/modules/module_13723.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
diff --git a/src/modules/module_13733.c b/src/modules/module_13733.c
index ac8b7c030..da0dbc473 100644
--- a/src/modules/module_13733.c
+++ b/src/modules/module_13733.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AppleM1, OpenCL, MTLCompilerService never-end (pure/optimized kernel)
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13741.c b/src/modules/module_13741.c
index f6df663e8..74c0283fd 100644
--- a/src/modules/module_13741.c
+++ b/src/modules/module_13741.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13742.c b/src/modules/module_13742.c
index 59aca8d53..0206b424b 100644
--- a/src/modules/module_13742.c
+++ b/src/modules/module_13742.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13743.c b/src/modules/module_13743.c
index cf58c6d44..093fbfd1f 100644
--- a/src/modules/module_13743.c
+++ b/src/modules/module_13743.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c
index fb999023a..57435bca3 100644
--- a/src/modules/module_13751.c
+++ b/src/modules/module_13751.c
@@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c
index 02fe05693..6cc3b9153 100644
--- a/src/modules/module_13752.c
+++ b/src/modules/module_13752.c
@@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c
index 443ba0274..f5fefbdc1 100644
--- a/src/modules/module_13753.c
+++ b/src/modules/module_13753.c
@@ -88,7 +88,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13761.c b/src/modules/module_13761.c
index dd686887a..bb18d91f8 100644
--- a/src/modules/module_13761.c
+++ b/src/modules/module_13761.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13762.c b/src/modules/module_13762.c
index 116db35af..c75b98a49 100644
--- a/src/modules/module_13762.c
+++ b/src/modules/module_13762.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_13763.c b/src/modules/module_13763.c
index 070b8df21..43fac2ef3 100644
--- a/src/modules/module_13763.c
+++ b/src/modules/module_13763.c
@@ -89,7 +89,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_14700.c b/src/modules/module_14700.c
index 5ce1ee308..8c447a784 100644
--- a/src/modules/module_14700.c
+++ b/src/modules/module_14700.c
@@ -66,7 +66,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_15200.c b/src/modules/module_15200.c
index 976f6e349..66a994216 100644
--- a/src/modules/module_15200.c
+++ b/src/modules/module_15200.c
@@ -56,7 +56,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c
index 5f97873af..c448572e1 100644
--- a/src/modules/module_15700.c
+++ b/src/modules/module_15700.c
@@ -279,7 +279,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64,
+  hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64,
     hashes->salts_buf[0].scrypt_N,
     hashes->salts_buf[0].scrypt_r,
     hashes->salts_buf[0].scrypt_p,
diff --git a/src/modules/module_18900.c b/src/modules/module_18900.c
index 436afdfd8..246ec4b46 100644
--- a/src/modules/module_18900.c
+++ b/src/modules/module_18900.c
@@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_19600.c b/src/modules/module_19600.c
index 29ccfe263..1e9861ad7 100644
--- a/src/modules/module_19600.c
+++ b/src/modules/module_19600.c
@@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_19700.c b/src/modules/module_19700.c
index beab1009b..b47ec6e40 100644
--- a/src/modules/module_19700.c
+++ b/src/modules/module_19700.c
@@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_19800.c b/src/modules/module_19800.c
index c7a50985a..5cab711b1 100644
--- a/src/modules/module_19800.c
+++ b/src/modules/module_19800.c
@@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_19900.c b/src/modules/module_19900.c
index 7b56ae41e..7a09d0f68 100644
--- a/src/modules/module_19900.c
+++ b/src/modules/module_19900.c
@@ -71,7 +71,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_20011.c b/src/modules/module_20011.c
index 32b4c2d29..6a6f917d3 100644
--- a/src/modules/module_20011.c
+++ b/src/modules/module_20011.c
@@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_20012.c b/src/modules/module_20012.c
index 4c81a7ccc..f6ef86149 100644
--- a/src/modules/module_20012.c
+++ b/src/modules/module_20012.c
@@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_20013.c b/src/modules/module_20013.c
index f45e68ee7..d92ad4ca8 100644
--- a/src/modules/module_20013.c
+++ b/src/modules/module_20013.c
@@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_22700.c b/src/modules/module_22700.c
index 0aae58e78..462128712 100644
--- a/src/modules/module_22700.c
+++ b/src/modules/module_22700.c
@@ -266,7 +266,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64,
+  hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64,
     hashes->salts_buf[0].scrypt_N,
     hashes->salts_buf[0].scrypt_r,
     hashes->salts_buf[0].scrypt_p,
diff --git a/src/modules/module_23100.c b/src/modules/module_23100.c
index db69c4947..ad5b36b95 100644
--- a/src/modules/module_23100.c
+++ b/src/modules/module_23100.c
@@ -67,7 +67,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_24500.c b/src/modules/module_24500.c
index b6d7c380f..3dad9ce8d 100644
--- a/src/modules/module_24500.c
+++ b/src/modules/module_24500.c
@@ -68,7 +68,10 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == false)
+    {
+      return true;
+    }
   }
 
   return false;
diff --git a/src/modules/module_27700.c b/src/modules/module_27700.c
index 346b9f73d..ce33d36cb 100644
--- a/src/modules/module_27700.c
+++ b/src/modules/module_27700.c
@@ -264,7 +264,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64,
+  hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64,
     hashes->salts_buf[0].scrypt_N,
     hashes->salts_buf[0].scrypt_r,
     hashes->salts_buf[0].scrypt_p,
diff --git a/src/modules/module_28200.c b/src/modules/module_28200.c
index 648a26403..c262bb55f 100644
--- a/src/modules/module_28200.c
+++ b/src/modules/module_28200.c
@@ -258,7 +258,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%" PRIu64 " -DSCRYPT_TMP_ELEM=%" PRIu64,
+  hc_asprintf (&jit_build_options, "-D SCRYPT_N=%u -D SCRYPT_R=%u -D SCRYPT_P=%u -D SCRYPT_TMTO=%" PRIu64 " -D SCRYPT_TMP_ELEM=%" PRIu64,
     hashes->salts_buf[0].scrypt_N,
     hashes->salts_buf[0].scrypt_r,
     hashes->salts_buf[0].scrypt_p,
diff --git a/src/selftest.c b/src/selftest.c
index 28b7b6d4f..977c38682 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -39,6 +39,15 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     device_param->kernel_params[18] = &device_param->hip_d_st_esalts_buf;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    device_param->kernel_params[15] = device_param->metal_d_st_digests_buf;
+    device_param->kernel_params[17] = device_param->metal_d_st_salts_buf;
+    device_param->kernel_params[18] = device_param->metal_d_st_esalts_buf;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf;
@@ -105,6 +114,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -143,6 +159,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -210,6 +233,15 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_combs_c, 0, &comb, 1 * sizeof (pw_t)) == -1) return -1;
+
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL) == -1) return -1;
@@ -248,6 +280,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -302,6 +341,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, &bf, 1 * sizeof (bf_t), device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_bfs_c, 0, &bf, 1 * sizeof (bf_t)) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_FALSE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL) == -1) return -1;
@@ -401,6 +447,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -432,6 +485,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_pws_buf, 0, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -487,6 +547,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 1) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (run_metal_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->metal_d_pws_buf, 1) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 1) == -1) return -1;
@@ -513,6 +580,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, device_param->size_hooks) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         /* blocking */
@@ -531,6 +605,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->hip_stream) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -591,6 +672,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, device_param->hooks_buf, device_param->metal_d_hooks, 0, device_param->size_hooks) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           /* blocking */
@@ -609,6 +697,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->hip_stream) == -1) return -1;
         }
 
+        #if defined (__APPLE__)
+        if (device_param->is_metal == true)
+        {
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, device_param->metal_d_hooks, 0, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
+        }
+        #endif
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -701,6 +796,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event3, device_param->hip_stream) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &num_cracked, device_param->metal_d_result, 0, sizeof (u32)) == -1) return -1;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_FALSE, 0, sizeof (u32), &num_cracked, 0, NULL, &opencl_event) == -1) return -1;
@@ -747,6 +849,22 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result,        device_param->size_results) == -1) return -1;
   }
 
+  #if defined (__APPLE__)
+  if (device_param->is_metal == true)
+  {
+    device_param->kernel_params[15] = device_param->metal_d_digests_buf;
+    device_param->kernel_params[17] = device_param->metal_d_salt_bufs;
+    device_param->kernel_params[18] = device_param->metal_d_esalt_bufs;
+
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_pws_buf,       device_param->size_pws)     == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_tmps,          device_param->size_tmps)    == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_hooks,         device_param->size_hooks)   == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_digests_shown, device_param->size_shown)   == -1) return -1;
+    if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_result,        device_param->size_results) == -1) return -1;
+  }
+  #endif
+
   if (device_param->is_opencl == true)
   {
     device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
@@ -773,6 +891,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1;
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, device_param->size_rules_c) == -1) return -1;
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1;
@@ -792,6 +917,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_rules_c, device_param->size_rules_c) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1;
@@ -809,6 +941,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, device_param->size_combs) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_combs_c, device_param->size_combs) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs) == -1) return -1;
@@ -826,6 +965,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, device_param->size_bfs) == -1) return -1;
       }
 
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        if (run_metal_kernel_bzero (hashcat_ctx, device_param, device_param->metal_d_bfs_c, device_param->size_bfs) == -1) return -1;
+      }
+      #endif
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs) == -1) return -1;
@@ -866,6 +1012,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! HIP kernel self-test failed.", device_param->device_id + 1);
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! Metal kernel self-test failed.", device_param->device_id + 1);
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1);
diff --git a/src/terminal.c b/src/terminal.c
index e40ae8b11..aab94cc84 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -972,6 +972,112 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  #if defined (__APPLE__)
+  if (backend_ctx->mtl)
+  {
+    event_log_info (hashcat_ctx, "Metal Info:");
+    event_log_info (hashcat_ctx, "===========");
+    event_log_info (hashcat_ctx, NULL);
+
+    int metal_devices_cnt = backend_ctx->metal_devices_cnt;
+    int metal_runtimeVersion = backend_ctx->metal_runtimeVersion;
+    char *metal_runtimeVersionStr = backend_ctx->metal_runtimeVersionStr;
+
+    if (metal_runtimeVersionStr != NULL)
+    {
+      event_log_info (hashcat_ctx, "Metal.Version.: %s", metal_runtimeVersionStr);
+    }
+    else
+    {
+      event_log_info (hashcat_ctx, "Metal.Version.: %d", metal_runtimeVersion);
+    }
+
+    event_log_info (hashcat_ctx, NULL);
+
+    for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++)
+    {
+      const int backend_devices_idx = backend_ctx->backend_device_from_metal[metal_devices_idx];
+
+      const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+      int   device_id                 = device_param->device_id;
+      int   device_mtl_maj            = device_param->mtl_major;
+      int   device_mtl_min            = device_param->mtl_minor;
+      int   device_max_transfer_rate  = device_param->device_max_transfer_rate;
+      int   device_physical_location  = device_param->device_physical_location;
+      int   device_location_number    = device_param->device_location_number;
+      int   device_registryID         = device_param->device_registryID;
+      int   device_is_headless        = device_param->device_is_headless;
+      int   device_is_low_power       = device_param->device_is_low_power;
+      int   device_is_removable       = device_param->device_is_removable;
+
+      char *device_name               = device_param->device_name;
+
+      u32   device_processors         = device_param->device_processors;
+
+      u64   device_global_mem         = device_param->device_global_mem;
+      u64   device_maxmem_alloc       = device_param->device_maxmem_alloc;
+      u64   device_available_mem      = device_param->device_available_mem;
+      u64   device_local_mem_size     = device_param->device_local_mem_size;
+
+      cl_device_type opencl_device_type         = device_param->opencl_device_type;
+      cl_uint        opencl_device_vendor_id    = device_param->opencl_device_vendor_id;
+      char          *opencl_device_vendor       = device_param->opencl_device_vendor;
+
+      if (device_param->device_id_alias_cnt)
+      {
+        event_log_info (hashcat_ctx, "Backend Device ID #%d (Alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1);
+      }
+
+      event_log_info (hashcat_ctx, "  Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+      event_log_info (hashcat_ctx, "  Vendor.ID......: %u", opencl_device_vendor_id);
+      event_log_info (hashcat_ctx, "  Vendor.........: %s", opencl_device_vendor);
+      event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
+      event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
+      event_log_info (hashcat_ctx, "  Clock..........: N/A");
+      event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB (limited to %" PRIu64 " MB allocatable in one block)", device_global_mem / 1024 / 1024, device_maxmem_alloc / 1024 / 1024);
+      event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
+      event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
+
+      switch (device_physical_location)
+      {
+        case MTL_DEVICE_LOCATION_BUILTIN:     event_log_info (hashcat_ctx, "  Phys.Location..: built-in"); break;
+        case MTL_DEVICE_LOCATION_SLOT:        event_log_info (hashcat_ctx, "  Phys.Location..: connected to slot %d", device_location_number); break;
+        case MTL_DEVICE_LOCATION_EXTERNAL:    event_log_info (hashcat_ctx, "  Phys.Location..: connected via an external interface (port %d)", device_location_number); break;
+        case MTL_DEVICE_LOCATION_UNSPECIFIED: event_log_info (hashcat_ctx, "  Phys.Location..: unspecified"); break;
+        default:                              event_log_info (hashcat_ctx, "  Phys.Location..: N/A"); break;
+      }
+
+      if (device_mtl_maj > 0 && device_mtl_min > 0)
+      {
+        event_log_info (hashcat_ctx, "  Feature.Set....: macOS GPU Family %d v%d", device_mtl_maj, device_mtl_min);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "  Feature.Set....: N/A");
+      }
+
+      event_log_info (hashcat_ctx, "  Registry.ID....: %d", device_registryID);
+
+      if (device_physical_location != MTL_DEVICE_LOCATION_BUILTIN)
+      {
+        event_log_info (hashcat_ctx, "  Max.TX.Rate....: %d MB/sec", device_max_transfer_rate);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "  Max.TX.Rate....: N/A");
+      }
+
+      event_log_info (hashcat_ctx, "  GPU.Properties.: headless %d, low-power %d, removable %d", device_is_headless, device_is_low_power, device_is_removable);
+      event_log_info (hashcat_ctx, NULL);
+    }
+  }
+  #endif
+
   if (backend_ctx->ocl)
   {
     event_log_info (hashcat_ctx, "OpenCL Info:");
@@ -1187,6 +1293,59 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx)
     event_log_info (hashcat_ctx, NULL);
   }
 
+  #if defined (__APPLE__)
+  /**
+   * Metal
+   */
+
+  if (backend_ctx->mtl)
+  {
+    int metal_devices_cnt    = backend_ctx->metal_devices_cnt;
+    char *metal_runtimeVersionStr = backend_ctx->metal_runtimeVersionStr;
+
+    size_t len = event_log_info (hashcat_ctx, "METAL API (Metal %s)", metal_runtimeVersionStr);
+
+    char line[HCBUFSIZ_TINY] = { 0 };
+
+    memset (line, '=', len);
+
+    line[len] = 0;
+
+    event_log_info (hashcat_ctx, "%s", line);
+
+    for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++)
+    {
+      const int backend_devices_idx = backend_ctx->backend_device_from_metal[metal_devices_idx];
+
+      const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+      int   device_id            = device_param->device_id;
+      char *device_name          = device_param->device_name;
+      u32   device_processors    = device_param->device_processors;
+      u64   device_global_mem    = device_param->device_global_mem;
+      u64   device_available_mem = device_param->device_available_mem;
+
+      if ((device_param->skipped == false) && (device_param->skipped_warning == false))
+      {
+        event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB, %uMCU",
+                  device_id + 1,
+                  device_name,
+                  device_available_mem / 1024 / 1024,
+                  device_global_mem    / 1024 / 1024,
+                  device_processors);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "* Device #%u: %s, skipped",
+                  device_id + 1,
+                  device_name);
+      }
+    }
+
+    event_log_info (hashcat_ctx, NULL);
+  }
+  #endif
+
   /**
    * OpenCL
    */
diff --git a/src/usage.c b/src/usage.c
index 69c35485a..514728fd2 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -95,6 +95,7 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   "     --example-hashes           |      | Alias of --hash-info                                 |",
   "     --backend-ignore-cuda      |      | Do not try to open CUDA interface on startup         |",
   "     --backend-ignore-hip       |      | Do not try to open HIP interface on startup          |",
+  "     --backend-ignore-metal     |      | Do not try to open Metal interface on startup        |",
   "     --backend-ignore-opencl    |      | Do not try to open OpenCL interface on startup       |",
   " -I, --backend-info             |      | Show info about detected backend API devices         | -I",
   " -d, --backend-devices          | Str  | Backend devices to use, separated with commas        | -d 1",
diff --git a/src/user_options.c b/src/user_options.c
index b55f4d83a..15076a3fe 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -35,6 +35,9 @@ static const struct option long_options[] =
   {"backend-devices",           required_argument, NULL, IDX_BACKEND_DEVICES},
   {"backend-ignore-cuda",       no_argument,       NULL, IDX_BACKEND_IGNORE_CUDA},
   {"backend-ignore-hip",        no_argument,       NULL, IDX_BACKEND_IGNORE_HIP},
+  #if defined (__APPLE__)
+  {"backend-ignore-metal",      no_argument,       NULL, IDX_BACKEND_IGNORE_METAL},
+  #endif
   {"backend-ignore-opencl",     no_argument,       NULL, IDX_BACKEND_IGNORE_OPENCL},
   {"backend-info",              no_argument,       NULL, IDX_BACKEND_INFO},
   {"backend-vector-width",      required_argument, NULL, IDX_BACKEND_VECTOR_WIDTH},
@@ -170,6 +173,9 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->backend_devices           = NULL;
   user_options->backend_ignore_cuda       = BACKEND_IGNORE_CUDA;
   user_options->backend_ignore_hip        = BACKEND_IGNORE_HIP;
+  #if defined (__APPLE__)
+  user_options->backend_ignore_metal      = BACKEND_IGNORE_METAL;
+  #endif
   user_options->backend_ignore_opencl     = BACKEND_IGNORE_OPENCL;
   user_options->backend_info              = BACKEND_INFO;
   user_options->backend_vector_width      = BACKEND_VECTOR_WIDTH;
@@ -455,6 +461,9 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_CPU_AFFINITY:              user_options->cpu_affinity              = optarg;                          break;
       case IDX_BACKEND_IGNORE_CUDA:       user_options->backend_ignore_cuda       = true;                            break;
       case IDX_BACKEND_IGNORE_HIP:        user_options->backend_ignore_hip        = true;                            break;
+      #if defined (__APPLE__)
+      case IDX_BACKEND_IGNORE_METAL:      user_options->backend_ignore_metal      = true;                            break;
+      #endif
       case IDX_BACKEND_IGNORE_OPENCL:     user_options->backend_ignore_opencl     = true;                            break;
       case IDX_BACKEND_INFO:              user_options->backend_info              = true;                            break;
       case IDX_BACKEND_DEVICES:           user_options->backend_devices           = optarg;                          break;