diff --git a/OpenCL/inc_amp.h b/OpenCL/inc_amp.h
index 5db6a21d8..e49b23959 100644
--- a/OpenCL/inc_amp.h
+++ b/OpenCL/inc_amp.h
@@ -16,7 +16,7 @@
   GLOBAL_AS   const bf_t          *bfs_buf,    \
   CONSTANT_AS const u32           &combs_mode, \
   CONSTANT_AS const u64           &gid_max,    \
-                    uint           hc_gid [[ thread_position_in_grid ]]
+                    uint3          hc_gid [[ thread_position_in_grid ]]
 
 #else // CUDA, HIP, OpenCL
 
diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h
index c24ecb524..d0b2ed989 100644
--- a/OpenCL/inc_common.h
+++ b/OpenCL/inc_common.h
@@ -124,10 +124,10 @@
 
 #if defined IS_METAL
 #define KERN_ATTR_MAIN_PARAMS                       \
-  uint hc_gid [[ thread_position_in_grid ]],        \
-  uint hc_lid [[ thread_position_in_threadgroup ]], \
-  uint hc_lsz [[ threads_per_threadgroup ]],        \
-  uint hc_bid [[ threadgroup_position_in_grid ]]
+  uint3 hc_gid [[ thread_position_in_grid ]],        \
+  uint3 hc_lid [[ thread_position_in_threadgroup ]], \
+  uint3 hc_lsz [[ threads_per_threadgroup ]],        \
+  uint3 hc_bid [[ threadgroup_position_in_grid ]]
 #endif // IS_METAL
 
 /*
diff --git a/OpenCL/inc_markov.h b/OpenCL/inc_markov.h
index 3aae8f7fc..1ec187b8e 100644
--- a/OpenCL/inc_markov.h
+++ b/OpenCL/inc_markov.h
@@ -19,7 +19,7 @@
   CONSTANT_AS const u32  &bits14,         \
   CONSTANT_AS const u32  &bits15,         \
   CONSTANT_AS const u64  &gid_max,        \
-                    uint  hc_gid [[ thread_position_in_grid ]]
+                    uint3 hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_R_MARKOV                \
   GLOBAL_AS         bf_t *pws_buf_r,      \
@@ -31,7 +31,7 @@
   CONSTANT_AS const u32  &bits14,         \
   CONSTANT_AS const u32  &bits15,         \
   CONSTANT_AS const u64  &gid_max,        \
-                    uint  hc_gid [[ thread_position_in_grid ]]
+                    uint3 hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_C_MARKOV                \
   GLOBAL_AS         pw_t *pws_buf,        \
@@ -43,7 +43,7 @@
   CONSTANT_AS const u32  &bits14,         \
   CONSTANT_AS const u32  &bits15,         \
   CONSTANT_AS const u64  &gid_max,        \
-                    uint  hc_gid [[ thread_position_in_grid ]]
+                    uint3 hc_gid [[ thread_position_in_grid ]]
 
 #else // CUDA, HIP, OpenCL
 
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index e1ffdefcf..9729d4fad 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -73,10 +73,25 @@ DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_or  (volatile GLOBAL_AS u32 *p, volatile const u32 val);
 
-#define get_global_id(param) hc_gid
-#define get_local_id(param) hc_lid
-#define get_local_size(param) hc_lsz
-#define get_group_id(param) hc_bid
+#define get_global_id(dimindx)   \
+  ((dimindx) == 0 ? hc_gid.x :   \
+   (dimindx) == 1 ? hc_gid.y :   \
+   (dimindx) == 2 ? hc_gid.z : -1)
+
+#define get_group_id(dimindx)    \
+  ((dimindx) == 0 ? hc_bid.x :   \
+   (dimindx) == 1 ? hc_bid.y :   \
+   (dimindx) == 2 ? hc_bid.z : -1)
+
+#define get_local_id(dimindx)    \
+  ((dimindx) == 0 ? hc_lid.x :   \
+   (dimindx) == 1 ? hc_lid.y :   \
+   (dimindx) == 2 ? hc_lid.z : -1)
+
+#define get_local_size(dimindx)  \
+  ((dimindx) == 0 ? hc_lsz.x :   \
+   (dimindx) == 1 ? hc_lsz.y :   \
+   (dimindx) == 2 ? hc_lsz.z : -1)
 
 DECLSPEC u32x rotl32   (const u32x a, const int n);
 DECLSPEC u32x rotr32   (const u32x a, const int n);
diff --git a/OpenCL/inc_shared.h b/OpenCL/inc_shared.h
index 16f2e2c4c..6518c30c6 100644
--- a/OpenCL/inc_shared.h
+++ b/OpenCL/inc_shared.h
@@ -13,28 +13,28 @@
   GLOBAL_AS         u32      *pws_comp, \
   GLOBAL_AS         pw_t     *pws_buf,  \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_MEMSET            \
   GLOBAL_AS         uint4    *buf,      \
   CONSTANT_AS const u32      &value,    \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_BZERO             \
   GLOBAL_AS         uint4    *buf,      \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_ATINIT            \
   GLOBAL_AS         pw_t     *buf,      \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_UTF8_TO_UTF16     \
   GLOBAL_AS         pw_t     *pws_buf,  \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #else // CUDA, HIP, OpenCL
 
diff --git a/docs/changes.txt b/docs/changes.txt
index 06e1e46bb..cfdba32b4 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -148,19 +148,23 @@
 - Status Code: Add specific return code for self-test fail (-11)
 - Scrypt: Increase buffer sizes in module for hash mode 8900 to allow longer scrypt digests
 - Unicode: Update UTF-8 to UTF-16 conversion to match RFC 3629
+- Unit tests: Updated install_modules.sh with Crypt::Argon2
 - User Options: Added error message when mixing --username and --show to warn users of exponential delay
 - MetaMask: update extraction tool to support MetaMask Mobile wallets
 - SecureCRT MasterPassphrase v2: update module, pure kernels and test unit. Add optimized kernels.
 - Metal Backend: added workaround to prevent 'Infinite Loop' bug when build kernels
 - Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel
+- Metal Backend: added support to 2D/3D Compute
 - Metal Backend: allow use of devices with Metal if runtime version is >= 200
 - Metal Backend: disable Metal devices only if at least one OpenCL device is active
+- Metal Backend: improved compute workloads calculation
 - Modules: Check UnpackSize to raise false positive with hc_decompress_rar
 - User Options: added --metal-compiler-runtime option
 - Hardware Monitor: avoid sprintf in src/ext_iokit.c
 - Hardware Monitor: Splitting hwmon_ctx_init function into smaller library-specific functions
 - Help: show supported hash-modes only with -hh
 - Makefile: prevent make failure with Apple Silicon in case of partial rebuild
+- Makefile: updated MACOSX_DEPLOYMENT_TARGET to 15.0
 - Rules: Rename best64.rule to best66.rule and remove the unknown section from it
 
 * changes v6.2.5 -> v6.2.6
diff --git a/include/ext_metal.h b/include/ext_metal.h
index 85facc62b..b51f09ff2 100644
--- a/include/ext_metal.h
+++ b/include/ext_metal.h
@@ -111,7 +111,7 @@ int  hc_mtlCreateLibraryWithFile    (void *hashcat_ctx, mtl_device_id metal_devi
 int  hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder);
 int  hc_mtlSetCommandEncoderArg     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size);
 
-int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
+int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
 
 #endif // __APPLE__
 
diff --git a/src/Makefile b/src/Makefile
index 2ed479330..2cfbf9406 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -358,7 +358,7 @@ LFLAGS_NATIVE           += -lpthread
 endif # NetBSD
 
 ifeq ($(UNAME),Darwin)
-export MACOSX_DEPLOYMENT_TARGET=10.15
+export MACOSX_DEPLOYMENT_TARGET=15.0
 CFLAGS_NATIVE           := $(CFLAGS)
 CFLAGS_NATIVE           += -DWITH_HWMON
 
diff --git a/src/backend.c b/src/backend.c
index 4fa01abb9..3109f8918 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2206,7 +2206,7 @@ int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
 
   double ms = 0;
 
-  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
 
   return 0;
 }
@@ -2234,7 +2234,7 @@ int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_
 
   double ms = 0;
 
-  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
 
   return 0;
 }
@@ -2265,7 +2265,7 @@ int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
 
   if (num16m)
@@ -2910,29 +2910,34 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       num_elements = num_elements * kernel_threads;
     }
 
+    unsigned int work_dim = 1;
+
     size_t global_work_size[3] = { num_elements,   1, 1 };
     size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
     }
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
     }
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
     }
 
     double ms = 0;
 
     if (is_autotune == true)
     {
-      hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
+      hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, work_dim, global_work_size, local_work_size, &ms);
 
       // hc_mtlEncodeComputeCommand_pre() must be called before every hc_mtlEncodeComputeCommand()
       if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
@@ -2951,7 +2956,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, work_dim, global_work_size, local_work_size, &ms);
 
     if (rc_cc != -1)
     {
@@ -3344,7 +3349,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
@@ -3435,7 +3440,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
@@ -3519,7 +3524,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     double ms = 0;
 
-    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms);
 
     // release tmp_buf
 
@@ -3599,7 +3604,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
diff --git a/src/ext_metal.m b/src/ext_metal.m
index cd23fb062..a05c1a0e7 100644
--- a/src/ext_metal.m
+++ b/src/ext_metal.m
@@ -195,11 +195,14 @@ static int hc_mtlBuildOptionsToDict (void *hashcat_ctx, const char *build_option
   }
 
   // if set, add INCLUDE_PATH to hack Apple kernel build from source limitation on -I usage
+
   if (include_path != nil)
   {
     NSString *path_key = @"INCLUDE_PATH";
     NSString *path_value = [NSString stringWithCString: include_path encoding: NSUTF8StringEncoding];
+
     // Include path may contain spaces, escape them with a backslash
+
     path_value = [path_value stringByReplacingOccurrencesOfString:@" " withString:@"\\ "];
 
     [build_options_dict setObject:path_value forKey:path_key];
@@ -743,6 +746,7 @@ int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_libra
   dispatch_queue_t queue = dispatch_get_global_queue (DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
 
   // if no user-defined runtime, set to METAL_COMPILER_RUNTIME
+
   long timeout = (user_options->metal_compiler_runtime > 0) ? user_options->metal_compiler_runtime : METAL_COMPILER_RUNTIME;
 
   dispatch_time_t when = dispatch_time (DISPATCH_TIME_NOW,NSEC_PER_SEC * timeout);
@@ -1314,10 +1318,21 @@ int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_com
   return 0;
 }
 
-int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms)
+int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms)
 {
-  MTLSize numThreadgroups = {local_work_size[0], local_work_size[1], local_work_size[2]};
-  MTLSize threadsGroup = {global_work_size[0], global_work_size[1], global_work_size[2]};
+  MTLSize threadsPerThreadgroup =
+  {
+    local_work_size[0],
+    local_work_size[1],
+    local_work_size[2]
+  };
+
+  MTLSize threadgroupsPerGrid =
+  {
+    (global_work_size[0] + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width,
+    work_dim > 1 ? (global_work_size[1] + threadsPerThreadgroup.height - 1) / threadsPerThreadgroup.height : 1,
+    work_dim > 2 ? (global_work_size[2] + threadsPerThreadgroup.depth - 1) / threadsPerThreadgroup.depth : 1
+  };
 
   if (metal_command_encoder == nil)
   {
@@ -1333,7 +1348,7 @@ int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_com
     return -1;
   }
 
-  [metal_command_encoder dispatchThreadgroups: threadsGroup threadsPerThreadgroup: numThreadgroups];
+  [metal_command_encoder dispatchThreadgroups: threadgroupsPerGrid threadsPerThreadgroup: threadsPerThreadgroup];
 
   [metal_command_encoder endEncoding];
   [metal_command_buffer commit];
@@ -1377,17 +1392,22 @@ int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device,
 
   if (k_string != nil)
   {
-    id <MTLLibrary> r = [metal_device newLibraryWithFile: k_string error: &error];
+    NSURL *libURL = [NSURL fileURLWithPath: k_string];
 
-    if (error != nil)
+    if (libURL != nil)
     {
-      event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
-      return -1;
+      id <MTLLibrary> r = [metal_device newLibraryWithURL: libURL error:&error];
+
+      if (error != nil)
+      {
+        event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
+        return -1;
+      }
+
+      *metal_library = r;
+
+      return 0;
     }
-
-    *metal_library = r;
-
-    return 0;
   }
 
   return -1;
@@ -1420,10 +1440,17 @@ int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device
       }
 
       compileOptions.preprocessorMacros = build_options_dict;
+      /*
+      compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize;
+      compileOptions.mathMode = MTLMathModeSafe;
+      // compileOptions.mathMode = MTLMathModeRelaxed;
+      // compileOptions.enableLogging = true;
+      // compileOptions.fastMathEnabled = false;
+      */
     }
 
     // todo: detect current os version and choose the right
-//    compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
+    // compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
 /*
     if (@available(macOS 12.0, *))
     {
diff --git a/tools/install_modules.sh b/tools/install_modules.sh
index 683d20000..948856e34 100755
--- a/tools/install_modules.sh
+++ b/tools/install_modules.sh
@@ -18,6 +18,7 @@ cpan install Authen::Passphrase::LANManager \
              Bitcoin::Crypto::Base58        \
              Compress::Zlib                 \
              Convert::EBCDIC                \
+             Crypt::Argon2                  \
              Crypt::AuthEnc::GCM            \
              Crypt::Camellia                \
              Crypt::CBC                     \