diff --git a/docs/changes.txt b/docs/changes.txt
index 2db77be02..f7d5a4113 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -60,6 +60,7 @@
 - OpenCL Kernels: Replace bitwise swaps with rotate() versions for AMD
 - OpenCL Kernels: Rewritten Keccak kernel to run fully on registers and partially reversed last round
 - OpenCL Kernels: Rewritten SIP kernel from scratch
+- OpenCL Kernels: Thread-count is set to hardware native count except if -w 4 is used then OpenCL maximum is used
 - OpenCL Kernels: Updated default scrypt TMTO to be ideal for latest NVidia and AMD top models
 - OpenCL Kernels: Vectorized tons of slow kernels to improve CPU cracking speed
 - OpenCL Runtime: Improved detection for AMD and NV devices on macOS
diff --git a/src/interface.c b/src/interface.c
index c0fa7cc8a..19c4f5167 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -25076,7 +25076,6 @@ u32 hashconfig_forced_kernel_threads (hashcat_ctx_t *hashcat_ctx)
 
 u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
 {
-  const hashconfig_t   *hashconfig   = hashcat_ctx->hashconfig;
   const user_options_t *user_options = hashcat_ctx->user_options;
 
   // a kernel can force a fixed value
@@ -25097,21 +25096,7 @@ u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_p
   {
     if (device_param->device_vendor_id == VENDOR_ID_NV)
     {
-      if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
-      {
-        if (user_options->workload_profile < 4)
-        {
-          kernel_threads = KERNEL_THREADS_MAX_GPU_NV;
-        }
-        else
-        {
-          kernel_threads = device_param->device_maxworkgroup_size;
-        }
-      }
-      else
-      {
-        kernel_threads = device_param->device_maxworkgroup_size;
-      }
+      kernel_threads = KERNEL_THREADS_MAX_GPU_NV;
     }
     else if (device_param->device_vendor_id == VENDOR_ID_AMD)
     {
@@ -25127,6 +25112,11 @@ u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_p
     kernel_threads = KERNEL_THREADS_MAX_OTHER;
   }
 
+  if (user_options->workload_profile == 4)
+  {
+    kernel_threads = device_param->device_maxworkgroup_size;
+  }
+
   // and (2) an opencl device can force an lower value (limited resources on device)
 
   kernel_threads = MIN (kernel_threads, device_param->device_maxworkgroup_size);