From 8a3eee3fe5b6cd2a88d154ea95bcd0a5422d57d3 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 27 Jun 2021 10:18:38 +0200
Subject: [PATCH] OpenCL Runtime: Workaround JiT crash (SC failed. No reason
 given.) on macOS by limiting local memory allocations to 32k

---
 docs/changes.txt |  1 +
 src/backend.c    | 24 +++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 3f7a1ad6d..5ec1ce1a4 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -46,6 +46,7 @@
 - Folders: Do not escape the variable cpath_real to prevent certain OpenCL runtimes from running into an error which do not support escape characters
 - LM: Workaround JiT compiler bug in -m 3000 on NV leading to false negatives with large amount of hashes
 - Tests: Changed tests for VeraCrypt from -a 0 to -a 3, because password extension is not available to all shells
+- OpenCL Runtime: Workaround JiT crash (SC failed. No reason given.) on macOS by limiting local memory allocations to 32k
 
 * changes v6.2.1 -> v6.2.2
 
diff --git a/src/backend.c b/src/backend.c
index 8c87fe976..da5aae373 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6506,6 +6506,21 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           }
         }
 
+        // workaround inc!
+        // allocating all reported local memory causes jit to fail with: SC failed. No reason given.
+        // if we limit ourself to 32k it seems to work
+
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
+          {
+            if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
+            {
+              device_local_mem_size = MIN (device_local_mem_size, 32768);
+            }
+          }
+        }
+
         device_param->device_local_mem_size = device_local_mem_size;
 
         // older POCL version and older LLVM versions are known to fail compiling kernels
@@ -7700,7 +7715,14 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
     }
     else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
     {
-      const u32 gpu_prefered_thread_count = 64;
+      u32 gpu_prefered_thread_count = 64;
+
+      if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
+      {
+        // based on clinfo output: Preferred work group size multiple (kernel)
+
+        gpu_prefered_thread_count = 32;
+      }
 
       kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
     }