From bbed0cd67ad21f82d590c27dd096f7170e4a3556 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 6 May 2019 15:06:02 +0200
Subject: [PATCH] Fix test.sh and bitsliced algos

---
 OpenCL/m01500_a3-pure.cl |  8 ++++++++
 OpenCL/m03000_a3-pure.cl |  8 ++++++++
 OpenCL/m14000_a3-pure.cl |  8 ++++++++
 src/backend.c            | 17 +++++++++++++----
 tools/test.sh            |  6 +++---
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl
index 28c9f2573..5c534cd4e 100644
--- a/OpenCL/m01500_a3-pure.cl
+++ b/OpenCL/m01500_a3-pure.cl
@@ -1998,7 +1998,11 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
@@ -2446,7 +2450,11 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl
index 8af1fad35..8f1449780 100644
--- a/OpenCL/m03000_a3-pure.cl
+++ b/OpenCL/m03000_a3-pure.cl
@@ -1830,7 +1830,11 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
@@ -2278,7 +2282,11 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl
index 6cfaee1db..cc98a5dce 100644
--- a/OpenCL/m14000_a3-pure.cl
+++ b/OpenCL/m14000_a3-pure.cl
@@ -2043,7 +2043,11 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
@@ -2555,7 +2559,11 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
diff --git a/src/backend.c b/src/backend.c
index eee101e6c..000e826b1 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -3119,7 +3119,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
       if (rc_cuEventRecord1 == -1) return -1;
 
-      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
+      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / 32, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
 
       if (rc_cuLaunchKernel == -1) return -1;
 
@@ -3525,7 +3525,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
   {
     CUfunction cuda_function = device_param->cuda_function_tm;
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_tm, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_tm, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
@@ -8526,8 +8526,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
       device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6];
 
-      device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
-      device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->cuda_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
+      }
     }
 
     device_param->kernel_params_memset_buf32[1] = 0; // value
diff --git a/tools/test.sh b/tools/test.sh
index 6f8b28d5b..d0546a219 100755
--- a/tools/test.sh
+++ b/tools/test.sh
@@ -2477,7 +2477,7 @@ cat << EOF
 
 OPTIONS:
 
-  -V    OpenCL vector-width (either 1, 2, 4 or 8), overrides value from device query :
+  -V    Backend vector-width (either 1, 2, 4 or 8), overrides value from device query :
         '1'         => vector-width 1
         '2'         => vector-width 2 (default)
         '4'         => vector-width 4
@@ -2507,7 +2507,7 @@ OPTIONS:
         'linux'     => Linux operating system (use .bin file extension)
         'macos'     => macOS operating system (use .app file extension)
 
-  -d    Select the OpenCL device :
+  -d    Select the Backend device :
         (int)[,int] => comma separated list of devices (default : 1)
 
   -D    Select the OpenCL device types :
@@ -2866,7 +2866,7 @@ if [ "${PACKAGE}" -eq 0 -o -z "${PACKAGE_FOLDER}" ]; then
           fi
 
           VECTOR=${CUR_WIDTH}
-          OPTS="${OPTS_OLD} --opencl-vector-width ${VECTOR}"
+          OPTS="${OPTS_OLD} --backend-vector-width ${VECTOR}"
 
           if [[ ${IS_SLOW} -eq 1 ]]; then