From 4b986de5fb4fd84ab247f4d33328d1b2de7a2d73 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 25 Apr 2019 14:45:17 +0200
Subject: [PATCH] Prepare native CUDA hybrid integration

---
 OpenCL/inc_common.cl            |   31 +-
 OpenCL/inc_platform.cl          |   44 ++
 OpenCL/inc_platform.h           |   24 +
 OpenCL/inc_types.h              |    4 +
 OpenCL/inc_vendor.h             |   21 +-
 OpenCL/m00000_a3-optimized.cl   |    1 +
 OpenCL/m02500-pure.cl           |    4 +
 include/{opencl.h => backend.h} |   39 +-
 include/ext_cuda.h              | 1042 +++++++++++++++++++++++++++++++
 include/types.h                 |    8 +-
 src/Makefile                    |    4 +-
 src/autotune.c                  |   12 +-
 src/{opencl.c => backend.c}     |  596 +++++++++++-------
 src/dispatch.c                  |   52 +-
 src/emu_inc_platform.c          |   11 +
 src/ext_cuda.c                  |    8 +
 src/hashcat.c                   |   94 +--
 src/hashes.c                    |    2 +-
 src/hwmon.c                     |  138 ++--
 src/interface.c                 |    2 +-
 src/main.c                      |    8 +-
 src/monitor.c                   |   16 +-
 src/mpsp.c                      |   10 +-
 src/outfile.c                   |    2 +-
 src/selftest.c                  |    8 +-
 src/status.c                    |  120 ++--
 src/stdout.c                    |    2 +-
 src/terminal.c                  |   30 +-
 src/user_options.c              |    2 +-
 29 files changed, 1825 insertions(+), 510 deletions(-)
 create mode 100644 OpenCL/inc_platform.cl
 create mode 100644 OpenCL/inc_platform.h
 rename include/{opencl.h => backend.h} (88%)
 create mode 100644 include/ext_cuda.h
 rename src/{opencl.c => backend.c} (92%)
 create mode 100644 src/emu_inc_platform.c
 create mode 100644 src/ext_cuda.c

diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index d497d349e..a87649b3e 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 
 /**
@@ -1415,8 +1416,8 @@ DECLSPEC int is_valid_hex_8 (const u8 v)
 {
   // direct lookup table is slower thanks to CMOV
 
-  if ((v >= '0') && (v <= '9')) return 1;
-  if ((v >= 'a') && (v <= 'f')) return 1;
+  if ((v >= (u8) '0') && (v <= (u8) '9')) return 1;
+  if ((v >= (u8) 'a') && (v <= (u8) 'f')) return 1;
 
   return 0;
 }
@@ -1433,10 +1434,10 @@ DECLSPEC int is_valid_hex_32 (const u32 v)
 
 DECLSPEC int is_valid_base58_8 (const u8 v)
 {
-  if (v > 'z') return 0;
-  if (v < '1') return 0;
-  if ((v > '9') && (v < 'A')) return 0;
-  if ((v > 'Z') && (v < 'a')) return 0;
+  if (v > (u8) 'z') return 0;
+  if (v < (u8) '1') return 0;
+  if ((v > (u8) '9') && (v < (u8) 'A')) return 0;
+  if ((v > (u8) 'Z') && (v < (u8) 'a')) return 0;
 
   return 1;
 }
@@ -60860,7 +60861,23 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_
 
   if (gid >= gid_max) return;
 
-  buf[gid] = (uint4) (value);
+  uint4 r;
+
+  #if   defined IS_NATIVE
+  r = value;
+  #elif defined IS_OPENCL
+  r.s0 = value;
+  r.s1 = value;
+  r.s2 = value;
+  r.s3 = value;
+  #elif defined IS_CUDA
+  r.x = value;
+  r.y = value;
+  r.z = value;
+  r.w = value;
+  #endif
+
+  buf[gid] = r;
 }
 
 KERNEL_FQ void gpu_atinit (GLOBAL_AS pw_t *buf, const u64 gid_max)
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
new file mode 100644
index 000000000..5c6d9780e
--- /dev/null
+++ b/OpenCL/inc_platform.cl
@@ -0,0 +1,44 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+
+#ifdef IS_NATIVE
+#endif
+
+#ifdef IS_CUDA
+
+DECLSPEC u32 atomic_dec (u32 *p)
+{
+  return atomicSub (p, 1);
+}
+
+DECLSPEC u32 atomic_inc (u32 *p)
+{
+  return atomicAdd (p, 1);
+}
+
+DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
+{
+  return blockDim.x * blockIdx.x + threadIdx.x;
+}
+
+DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
+{
+  return threadIdx.x;
+}
+
+DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
+{
+  // verify
+  return blockDim.x;
+}
+
+#endif
+
+#ifdef IS_OPENCL
+#endif
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
new file mode 100644
index 000000000..fd3d310d1
--- /dev/null
+++ b/OpenCL/inc_platform.h
@@ -0,0 +1,24 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _INC_PLATFORM_H
+
+#ifdef IS_CUDA
+DECLSPEC u32    atomic_dec      (u32 *p);
+DECLSPEC u32    atomic_inc      (u32 *p);
+DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
+DECLSPEC uint4  uint4_init      (const u32 a);
+DECLSPEC uint4  uint4_init      (const u32 a, const u32 b, const u32 c, const u32 d);
+DECLSPEC __inline__ u8    rotate (const u8  v, const int i);
+DECLSPEC __inline__ u32   rotate (const u32 v, const int i);
+DECLSPEC __inline__ u64   rotate (const u64 v, const int i);
+
+#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
+#define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
+#endif
+
+#endif // _INC_PLATFORM_H
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index b9eb3bd03..0e254c24a 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -6,6 +6,10 @@
 #ifndef _INC_TYPES_H
 #define _INC_TYPES_H
 
+#ifdef IS_CUDA
+typedef unsigned char uchar;
+#endif
+
 #ifdef KERNEL_STATIC
 typedef uchar  u8;
 typedef ushort u16;
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index ba85cbc74..b2bbd9037 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -6,12 +6,25 @@
 #ifndef _INC_VENDOR_H
 #define _INC_VENDOR_H
 
-#ifdef _CPU_OPENCL_EMU_H
+#if defined _CPU_OPENCL_EMU_H
+#define IS_NATIVE
+#elif defined __CUDACC__
+#define IS_CUDA
+#else
+#define IS_OPENCL
+#endif
+
+#if defined IS_NATIVE
 #define CONSTANT_AS
 #define GLOBAL_AS
 #define LOCAL_AS
 #define KERNEL_FQ
-#else
+#elif defined IS_CUDA
+#define CONSTANT_AS
+#define GLOBAL_AS
+#define LOCAL_AS
+#define KERNEL_FQ   __global__
+#elif defined IS_OPENCL
 #define CONSTANT_AS __constant
 #define GLOBAL_AS   __global
 #define LOCAL_AS    __local
@@ -90,11 +103,15 @@
 #if defined IS_CPU
 #define DECLSPEC inline
 #elif defined IS_GPU
+#if defined IS_CUDA
+#define DECLSPEC __device__
+#else
 #if defined IS_AMD
 #define DECLSPEC inline static
 #else
 #define DECLSPEC
 #endif
+#endif
 #else
 #define DECLSPEC
 #endif
diff --git a/OpenCL/m00000_a3-optimized.cl b/OpenCL/m00000_a3-optimized.cl
index 72403afb8..ef27d52f1 100644
--- a/OpenCL/m00000_a3-optimized.cl
+++ b/OpenCL/m00000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl
index a4ebce4d5..8066d3298 100644
--- a/OpenCL/m02500-pure.cl
+++ b/OpenCL/m02500-pure.cl
@@ -679,7 +679,11 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)
     s_te4[i] = te4[i];
   }
 
+  #ifdef IS_CUDA
+  __syncthreads();
+  #else
   barrier (CLK_LOCAL_MEM_FENCE);
+  #endif
 
   #else
 
diff --git a/include/opencl.h b/include/backend.h
similarity index 88%
rename from include/opencl.h
rename to include/backend.h
index bd45111c2..9b25a3496 100644
--- a/include/opencl.h
+++ b/include/backend.h
@@ -3,8 +3,8 @@
  * License.....: MIT
  */
 
-#ifndef _OPENCL_H
-#define _OPENCL_H
+#ifndef _BACKEND_H
+#define _BACKEND_H
 
 #include <stdio.h>
 #include <errno.h>
@@ -22,8 +22,11 @@ static const char CL_VENDOR_MESA[]            = "Mesa";
 static const char CL_VENDOR_NV[]              = "NVIDIA Corporation";
 static const char CL_VENDOR_POCL[]            = "The pocl project";
 
-int  ocl_init  (hashcat_ctx_t *hashcat_ctx);
-void ocl_close (hashcat_ctx_t *hashcat_ctx);
+int  cuda_init  (hashcat_ctx_t *hashcat_ctx);
+void cuda_close (hashcat_ctx_t *hashcat_ctx);
+
+int  ocl_init   (hashcat_ctx_t *hashcat_ctx);
+void ocl_close  (hashcat_ctx_t *hashcat_ctx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
@@ -82,20 +85,20 @@ void generate_cached_kernel_mp_filename  (const u32 opti_type, const u64 opts_ty
 void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_dir, char *source_file);
 void generate_cached_kernel_amp_filename (const u32 attack_kern, char *profile_dir, const char *device_name_chksum, char *cached_file);
 
-int  opencl_ctx_init                  (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_destroy               (hashcat_ctx_t *hashcat_ctx);
+int  backend_ctx_init                  (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_destroy               (hashcat_ctx_t *hashcat_ctx);
 
-int  opencl_ctx_devices_init          (hashcat_ctx_t *hashcat_ctx, const int comptime);
-void opencl_ctx_devices_destroy       (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_devices_sync_tuning   (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_devices_update_power  (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_devices_kernel_loops  (hashcat_ctx_t *hashcat_ctx);
+int  backend_ctx_devices_init          (hashcat_ctx_t *hashcat_ctx, const int comptime);
+void backend_ctx_devices_destroy       (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_devices_sync_tuning   (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_devices_update_power  (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_devices_kernel_loops  (hashcat_ctx_t *hashcat_ctx);
 
-int  opencl_session_begin             (hashcat_ctx_t *hashcat_ctx);
-void opencl_session_destroy           (hashcat_ctx_t *hashcat_ctx);
-void opencl_session_reset             (hashcat_ctx_t *hashcat_ctx);
-int  opencl_session_update_combinator (hashcat_ctx_t *hashcat_ctx);
-int  opencl_session_update_mp         (hashcat_ctx_t *hashcat_ctx);
-int  opencl_session_update_mp_rl      (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r);
+int  backend_session_begin             (hashcat_ctx_t *hashcat_ctx);
+void backend_session_destroy           (hashcat_ctx_t *hashcat_ctx);
+void backend_session_reset             (hashcat_ctx_t *hashcat_ctx);
+int  backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx);
+int  backend_session_update_mp         (hashcat_ctx_t *hashcat_ctx);
+int  backend_session_update_mp_rl      (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r);
 
-#endif // _OPENCL_H
+#endif // _BACKEND_H
diff --git a/include/ext_cuda.h b/include/ext_cuda.h
new file mode 100644
index 000000000..d51fd2286
--- /dev/null
+++ b/include/ext_cuda.h
@@ -0,0 +1,1042 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EXT_CUDA_H
+#define _EXT_CUDA_H
+
+/**
+ * from cuda.h (/usr/local/cuda-10.1/targets/x86_64-linux/include/cuda.h)
+ */
+
+#define __CUDA_API_VERSION 10010
+
+/**
+ * CUDA device pointer
+ * CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
+ */
+#if __CUDA_API_VERSION >= 3020
+
+#if defined(_WIN64) || defined(__LP64__)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+
+#endif /* __CUDA_API_VERSION >= 3020 */
+
+typedef int CUdevice;                                     /**< CUDA device */
+typedef struct CUctx_st *CUcontext;                       /**< CUDA context */
+typedef struct CUevent_st *CUevent;                       /**< CUDA event */
+typedef struct CUfunc_st *CUfunction;                     /**< CUDA function */
+typedef struct CUmod_st *CUmodule;                        /**< CUDA module */
+typedef struct CUstream_st *CUstream;                     /**< CUDA stream */
+
+typedef enum cudaError_enum {
+    /**
+     * The API call returned with no errors. In the case of query calls, this
+     * also means that the operation being queried is complete (see
+     * ::cuEventQuery() and ::cuStreamQuery()).
+     */
+    CUDA_SUCCESS                              = 0,
+
+    /**
+     * This indicates that one or more of the parameters passed to the API call
+     * is not within an acceptable range of values.
+     */
+    CUDA_ERROR_INVALID_VALUE                  = 1,
+
+    /**
+     * The API call failed because it was unable to allocate enough memory to
+     * perform the requested operation.
+     */
+    CUDA_ERROR_OUT_OF_MEMORY                  = 2,
+
+    /**
+     * This indicates that the CUDA driver has not been initialized with
+     * ::cuInit() or that initialization has failed.
+     */
+    CUDA_ERROR_NOT_INITIALIZED                = 3,
+
+    /**
+     * This indicates that the CUDA driver is in the process of shutting down.
+     */
+    CUDA_ERROR_DEINITIALIZED                  = 4,
+
+    /**
+     * This indicates profiler is not initialized for this run. This can
+     * happen when the application is running with external profiling tools
+     * like visual profiler.
+     */
+    CUDA_ERROR_PROFILER_DISABLED              = 5,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of CUDA 5.0. It is no longer an error
+     * to attempt to enable/disable the profiling via ::cuProfilerStart or
+     * ::cuProfilerStop without initialization.
+     */
+    CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of CUDA 5.0. It is no longer an error
+     * to call cuProfilerStart() when profiling is already enabled.
+     */
+    CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of CUDA 5.0. It is no longer an error
+     * to call cuProfilerStop() when profiling is already disabled.
+     */
+    CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8,
+
+    /**
+     * This indicates that no CUDA-capable devices were detected by the installed
+     * CUDA driver.
+     */
+    CUDA_ERROR_NO_DEVICE                      = 100,
+
+    /**
+     * This indicates that the device ordinal supplied by the user does not
+     * correspond to a valid CUDA device.
+     */
+    CUDA_ERROR_INVALID_DEVICE                 = 101,
+
+
+    /**
+     * This indicates that the device kernel image is invalid. This can also
+     * indicate an invalid CUDA module.
+     */
+    CUDA_ERROR_INVALID_IMAGE                  = 200,
+
+    /**
+     * This most frequently indicates that there is no context bound to the
+     * current thread. This can also be returned if the context passed to an
+     * API call is not a valid handle (such as a context that has had
+     * ::cuCtxDestroy() invoked on it). This can also be returned if a user
+     * mixes different API versions (i.e. 3010 context with 3020 API calls).
+     * See ::cuCtxGetApiVersion() for more details.
+     */
+    CUDA_ERROR_INVALID_CONTEXT                = 201,
+
+    /**
+     * This indicated that the context being supplied as a parameter to the
+     * API call was already the active context.
+     * \deprecated
+     * This error return is deprecated as of CUDA 3.2. It is no longer an
+     * error to attempt to push the active context via ::cuCtxPushCurrent().
+     */
+    CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
+
+    /**
+     * This indicates that a map or register operation has failed.
+     */
+    CUDA_ERROR_MAP_FAILED                     = 205,
+
+    /**
+     * This indicates that an unmap or unregister operation has failed.
+     */
+    CUDA_ERROR_UNMAP_FAILED                   = 206,
+
+    /**
+     * This indicates that the specified array is currently mapped and thus
+     * cannot be destroyed.
+     */
+    CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
+
+    /**
+     * This indicates that the resource is already mapped.
+     */
+    CUDA_ERROR_ALREADY_MAPPED                 = 208,
+
+    /**
+     * This indicates that there is no kernel image available that is suitable
+     * for the device. This can occur when a user specifies code generation
+     * options for a particular CUDA source file that do not include the
+     * corresponding device configuration.
+     */
+    CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
+
+    /**
+     * This indicates that a resource has already been acquired.
+     */
+    CUDA_ERROR_ALREADY_ACQUIRED               = 210,
+
+    /**
+     * This indicates that a resource is not mapped.
+     */
+    CUDA_ERROR_NOT_MAPPED                     = 211,
+
+    /**
+     * This indicates that a mapped resource is not available for access as an
+     * array.
+     */
+    CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
+
+    /**
+     * This indicates that a mapped resource is not available for access as a
+     * pointer.
+     */
+    CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
+
+    /**
+     * This indicates that an uncorrectable ECC error was detected during
+     * execution.
+     */
+    CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
+
+    /**
+     * This indicates that the ::CUlimit passed to the API call is not
+     * supported by the active device.
+     */
+    CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
+
+    /**
+     * This indicates that the ::CUcontext passed to the API call can
+     * only be bound to a single CPU thread at a time but is already
+     * bound to a CPU thread.
+     */
+    CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
+
+    /**
+     * This indicates that peer access is not supported across the given
+     * devices.
+     */
+    CUDA_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
+
+    /**
+     * This indicates that a PTX JIT compilation failed.
+     */
+    CUDA_ERROR_INVALID_PTX                    = 218,
+
+    /**
+     * This indicates an error with OpenGL or DirectX context.
+     */
+    CUDA_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
+
+    /**
+    * This indicates that an uncorrectable NVLink error was detected during the
+    * execution.
+    */
+    CUDA_ERROR_NVLINK_UNCORRECTABLE           = 220,
+
+    /**
+    * This indicates that the PTX JIT compiler library was not found.
+    */
+    CUDA_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
+
+    /**
+     * This indicates that the device kernel source is invalid.
+     */
+    CUDA_ERROR_INVALID_SOURCE                 = 300,
+
+    /**
+     * This indicates that the file specified was not found.
+     */
+    CUDA_ERROR_FILE_NOT_FOUND                 = 301,
+
+    /**
+     * This indicates that a link to a shared object failed to resolve.
+     */
+    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+
+    /**
+     * This indicates that initialization of a shared object failed.
+     */
+    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
+
+    /**
+     * This indicates that an OS call failed.
+     */
+    CUDA_ERROR_OPERATING_SYSTEM               = 304,
+
+    /**
+     * This indicates that a resource handle passed to the API call was not
+     * valid. Resource handles are opaque types like ::CUstream and ::CUevent.
+     */
+    CUDA_ERROR_INVALID_HANDLE                 = 400,
+
+    /**
+     * This indicates that a resource required by the API call is not in a
+     * valid state to perform the requested operation.
+     */
+    CUDA_ERROR_ILLEGAL_STATE                  = 401,
+
+    /**
+     * This indicates that a named symbol was not found. Examples of symbols
+     * are global/constant variable names, texture names, and surface names.
+     */
+    CUDA_ERROR_NOT_FOUND                      = 500,
+
+    /**
+     * This indicates that asynchronous operations issued previously have not
+     * completed yet. This result is not actually an error, but must be indicated
+     * differently than ::CUDA_SUCCESS (which indicates completion). Calls that
+     * may return this value include ::cuEventQuery() and ::cuStreamQuery().
+     */
+    CUDA_ERROR_NOT_READY                      = 600,
+
+    /**
+     * While executing a kernel, the device encountered a
+     * load or store instruction on an invalid memory address.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_ILLEGAL_ADDRESS                = 700,
+
+    /**
+     * This indicates that a launch did not occur because it did not have
+     * appropriate resources. This error usually indicates that the user has
+     * attempted to pass too many arguments to the device kernel, or the
+     * kernel launch specifies too many threads for the kernel's register
+     * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
+     * when a 32-bit int is expected) is equivalent to passing too many
+     * arguments and can also result in this error.
+     */
+    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
+
+    /**
+     * This indicates that the device kernel took too long to execute. This can
+     * only occur if timeouts are enabled - see the device attribute
+     * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
+
+    /**
+     * This error indicates a kernel launch that uses an incompatible texturing
+     * mode.
+     */
+    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
+
+    /**
+     * This error indicates that a call to ::cuCtxEnablePeerAccess() is
+     * trying to re-enable peer access to a context which has already
+     * had peer access to it enabled.
+     */
+    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
+
+    /**
+     * This error indicates that ::cuCtxDisablePeerAccess() is
+     * trying to disable peer access which has not been enabled yet
+     * via ::cuCtxEnablePeerAccess().
+     */
+    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
+
+    /**
+     * This error indicates that the primary context for the specified device
+     * has already been initialized.
+     */
+    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
+
+    /**
+     * This error indicates that the context current to the calling thread
+     * has been destroyed using ::cuCtxDestroy, or is a primary context which
+     * has not yet been initialized.
+     */
+    CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709,
+
+    /**
+     * A device-side assert triggered during kernel execution. The context
+     * cannot be used anymore, and must be destroyed. All existing device
+     * memory allocations from this context are invalid and must be
+     * reconstructed if the program is to continue using CUDA.
+     */
+    CUDA_ERROR_ASSERT                         = 710,
+
+    /**
+     * This error indicates that the hardware resources required to enable
+     * peer access have been exhausted for one or more of the devices
+     * passed to ::cuCtxEnablePeerAccess().
+     */
+    CUDA_ERROR_TOO_MANY_PEERS                 = 711,
+
+    /**
+     * This error indicates that the memory range passed to ::cuMemHostRegister()
+     * has already been registered.
+     */
+    CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
+
+    /**
+     * This error indicates that the pointer passed to ::cuMemHostUnregister()
+     * does not correspond to any currently registered memory region.
+     */
+    CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
+
+    /**
+     * While executing a kernel, the device encountered a stack error.
+     * This can be due to stack corruption or exceeding the stack size limit.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_HARDWARE_STACK_ERROR           = 714,
+
+    /**
+     * While executing a kernel, the device encountered an illegal instruction.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_ILLEGAL_INSTRUCTION            = 715,
+
+    /**
+     * While executing a kernel, the device encountered a load or store instruction
+     * on a memory address which is not aligned.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_MISALIGNED_ADDRESS             = 716,
+
+    /**
+     * While executing a kernel, the device encountered an instruction
+     * which can only operate on memory locations in certain address spaces
+     * (global, shared, or local), but was supplied a memory address not
+     * belonging to an allowed address space.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_INVALID_ADDRESS_SPACE          = 717,
+
+    /**
+     * While executing a kernel, the device program counter wrapped its address space.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_INVALID_PC                     = 718,
+
+    /**
+     * An exception occurred on the device while executing a kernel. Common
+     * causes include dereferencing an invalid device pointer and accessing
+     * out of bounds shared memory. Less common cases can be system specific - more
+     * information about these cases can be found in the system specific user guide.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_LAUNCH_FAILED                  = 719,
+
+    /**
+     * This error indicates that the number of blocks launched per grid for a kernel that was
+     * launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice
+     * exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor
+     * or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
+     * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
+     */
+    CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
+
+    /**
+     * This error indicates that the attempted operation is not permitted.
+     */
+    CUDA_ERROR_NOT_PERMITTED                  = 800,
+
+    /**
+     * This error indicates that the attempted operation is not supported
+     * on the current system or device.
+     */
+    CUDA_ERROR_NOT_SUPPORTED                  = 801,
+
+    /**
+     * This error indicates that the system is not yet ready to start any CUDA
+     * work.  To continue using CUDA, verify the system configuration is in a
+     * valid state and all required driver daemons are actively running.
+     * More information about this error can be found in the system specific
+     * user guide.
+     */
+    CUDA_ERROR_SYSTEM_NOT_READY               = 802,
+
+    /**
+     * This error indicates that there is a mismatch between the versions of
+     * the display driver and the CUDA driver. Refer to the compatibility documentation
+     * for supported versions.
+     */
+    CUDA_ERROR_SYSTEM_DRIVER_MISMATCH         = 803,
+
+    /**
+     * This error indicates that the system was upgraded to run with forward compatibility
+     * but the visible hardware detected by CUDA does not support this configuration.
+     * Refer to the compatibility documentation for the supported hardware matrix or ensure
+     * that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES
+     * environment variable.
+     */
+    CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
+
+    /**
+     * This error indicates that the operation is not permitted when
+     * the stream is capturing.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED     = 900,
+
+    /**
+     * This error indicates that the current capture sequence on the stream
+     * has been invalidated due to a previous error.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_INVALIDATED     = 901,
+
+    /**
+     * This error indicates that the operation would have resulted in a merge
+     * of two independent capture sequences.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_MERGE           = 902,
+
+    /**
+     * This error indicates that the capture was not initiated in this stream.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_UNMATCHED       = 903,
+
+    /**
+     * This error indicates that the capture sequence contains a fork that was
+     * not joined to the primary stream.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_UNJOINED        = 904,
+
+    /**
+     * This error indicates that a dependency would have been created which
+     * crosses the capture sequence boundary. Only implicit in-stream ordering
+     * dependencies are allowed to cross the boundary.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_ISOLATION       = 905,
+
+    /**
+     * This error indicates a disallowed implicit dependency on a current capture
+     * sequence from cudaStreamLegacy.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_IMPLICIT        = 906,
+
+    /**
+     * This error indicates that the operation is not permitted on an event which
+     * was last recorded in a capturing stream.
+     */
+    CUDA_ERROR_CAPTURED_EVENT                 = 907,
+
+    /**
+     * A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED
+     * argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a
+     * different thread.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD    = 908,
+
+    /**
+     * This indicates that an unknown internal error has occurred.
+     */
+    CUDA_ERROR_UNKNOWN                        = 999
+} CUresult;
+
+/**
+ * Online compiler and linker options
+ */
+typedef enum CUjit_option_enum
+{
+    /**
+     * Max number of registers that a thread may use.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_MAX_REGISTERS = 0,
+
+    /**
+     * IN: Specifies minimum number of threads per block to target compilation
+     * for\n
+     * OUT: Returns the number of threads the compiler actually targeted.
+     * This restricts the resource utilization fo the compiler (e.g. max
+     * registers) such that a block with the given number of threads should be
+     * able to launch based on register limitations. Note, this option does not
+     * currently take into account any other resource limitations, such as
+     * shared memory utilization.\n
+     * Cannot be combined with ::CU_JIT_TARGET.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_THREADS_PER_BLOCK,
+
+    /**
+     * Overwrites the option value with the total wall clock time, in
+     * milliseconds, spent in the compiler and linker\n
+     * Option type: float\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_WALL_TIME,
+
+    /**
+     * Pointer to a buffer in which to print any log messages
+     * that are informational in nature (the buffer size is specified via
+     * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n
+     * Option type: char *\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_INFO_LOG_BUFFER,
+
+    /**
+     * IN: Log buffer size in bytes.  Log messages will be capped at this size
+     * (including null terminator)\n
+     * OUT: Amount of log buffer filled with messages\n
+     * Option type: unsigned int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+
+    /**
+     * Pointer to a buffer in which to print any log messages that
+     * reflect errors (the buffer size is specified via option
+     * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
+     * Option type: char *\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_ERROR_LOG_BUFFER,
+
+    /**
+     * IN: Log buffer size in bytes.  Log messages will be capped at this size
+     * (including null terminator)\n
+     * OUT: Amount of log buffer filled with messages\n
+     * Option type: unsigned int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+
+    /**
+     * Level of optimizations to apply to generated code (0 - 4), with 4
+     * being the default and highest level of optimizations.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_OPTIMIZATION_LEVEL,
+
+    /**
+     * No option value required. Determines the target based on the current
+     * attached context (default)\n
+     * Option type: No option value needed\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_TARGET_FROM_CUCONTEXT,
+
+    /**
+     * Target is chosen based on supplied ::CUjit_target.  Cannot be
+     * combined with ::CU_JIT_THREADS_PER_BLOCK.\n
+     * Option type: unsigned int for enumerated type ::CUjit_target\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_TARGET,
+
+    /**
+     * Specifies choice of fallback strategy if matching cubin is not found.
+     * Choice is based on supplied ::CUjit_fallback.  This option cannot be
+     * used with cuLink* APIs as the linker requires exact matches.\n
+     * Option type: unsigned int for enumerated type ::CUjit_fallback\n
+     * Applies to: compiler only
+     */
+    CU_JIT_FALLBACK_STRATEGY,
+
+    /**
+     * Specifies whether to create debug information in output (-g)
+     * (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_GENERATE_DEBUG_INFO,
+
+    /**
+     * Generate verbose log messages (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_LOG_VERBOSE,
+
+    /**
+     * Generate line number information (-lineinfo) (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_GENERATE_LINE_INFO,
+
+    /**
+     * Specifies whether to enable caching explicitly (-dlcm) \n
+     * Choice is based on supplied ::CUjit_cacheMode_enum.\n
+     * Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum\n
+     * Applies to: compiler only
+     */
+    CU_JIT_CACHE_MODE,
+
+    /**
+     * The below jit options are used for internal purposes only, in this version of CUDA
+     */
+    CU_JIT_NEW_SM3X_OPT,
+    CU_JIT_FAST_COMPILE,
+
+    /**
+     * Array of device symbol names that will be relocated to the corresponing
+     * host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
+     * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
+     * When loding a device module, driver will relocate all encountered
+     * unresolved symbols to the host addresses.\n
+     * It is only allowed to register symbols that correspond to unresolved
+     * global variables.\n
+     * It is illegal to register the same device symbol at multiple addresses.\n
+     * Option type: const char **\n
+     * Applies to: dynamic linker only
+     */
+    CU_JIT_GLOBAL_SYMBOL_NAMES,
+
+    /**
+     * Array of host addresses that will be used to relocate corresponding
+     * device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES.\n
+     * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
+     * Option type: void **\n
+     * Applies to: dynamic linker only
+     */
+    CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
+
+    /**
+     * Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and
+     * ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n
+     * Option type: unsigned int\n
+     * Applies to: dynamic linker only
+     */
+    CU_JIT_GLOBAL_SYMBOL_COUNT,
+
+    CU_JIT_NUM_OPTIONS
+
+} CUjit_option;
+
+/**
+ * Device properties
+ */
+typedef enum CUdevice_attribute_enum {
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,              /**< Maximum number of threads per block */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,                    /**< Maximum block dimension X */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,                    /**< Maximum block dimension Y */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,                    /**< Maximum block dimension Z */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,                     /**< Maximum grid dimension X */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,                     /**< Maximum grid dimension Y */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,                     /**< Maximum grid dimension Z */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,        /**< Maximum shared memory available per block in bytes */
+    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,            /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
+    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,              /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
+    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,                         /**< Warp size in threads */
+    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,                         /**< Maximum pitch in bytes allowed by memory copies */
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,           /**< Maximum number of 32-bit registers available per block */
+    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,               /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
+    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,                        /**< Typical clock frequency in kilohertz */
+    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,                 /**< Alignment requirement for textures */
+    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,                       /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */
+    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,              /**< Number of multiprocessors on device */
+    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,               /**< Specifies whether there is a run time limit on kernels */
+    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,                        /**< Device is integrated with host memory */
+    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,               /**< Device can map host memory into CUDA address space */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,                      /**< Compute mode (See ::CUcomputemode for details) */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,           /**< Maximum 1D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,           /**< Maximum 2D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,          /**< Maximum 2D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,           /**< Maximum 3D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,          /**< Maximum 3D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,           /**< Maximum 3D texture depth */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,   /**< Maximum 2D layered texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,  /**< Maximum 2D layered texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,  /**< Maximum layers in a 2D layered texture */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,     /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,    /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */
+    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,                 /**< Alignment requirement for surfaces */
+    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,                /**< Device can possibly execute multiple kernels concurrently */
+    CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,                       /**< Device has ECC support enabled */
+    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,                        /**< PCI bus ID of the device */
+    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,                     /**< PCI device ID of the device */
+    CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,                        /**< Device is using TCC driver model */
+    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,                 /**< Peak memory clock frequency in kilohertz */
+    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,           /**< Global memory bus width in bits */
+    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,                     /**< Size of L2 cache in bytes */
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,    /**< Maximum resident threads per multiprocessor */
+    CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,                /**< Number of asynchronous engines */
+    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,                /**< Device shares a unified address space with the host */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,   /**< Maximum 1D layered texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,  /**< Maximum layers in a 1D layered texture */
+    CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,                  /**< Deprecated, do not use. */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,    /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,   /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,/**< Alternate maximum 3D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */
+    CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,                     /**< PCI domain ID of the device */
+    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,           /**< Pitch alignment requirement for textures */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,      /**< Maximum cubemap texture width/height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,  /**< Maximum cubemap layered texture width/height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,           /**< Maximum 1D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,           /**< Maximum 2D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,          /**< Maximum 2D surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,           /**< Maximum 3D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,          /**< Maximum 3D surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,           /**< Maximum 3D surface depth */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,   /**< Maximum 1D layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,  /**< Maximum layers in a 1D layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,   /**< Maximum 2D layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,  /**< Maximum 2D layered surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,  /**< Maximum layers in a 2D layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,      /**< Maximum cubemap surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,  /**< Maximum cubemap layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,    /**< Maximum 1D linear texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,    /**< Maximum 2D linear texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,   /**< Maximum 2D linear texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,    /**< Maximum 2D linear texture pitch in bytes */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, /**< Maximum mipmapped 2D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,/**< Maximum mipmapped 2D texture height */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,          /**< Major compute capability version number */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,          /**< Minor compute capability version number */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, /**< Maximum mipmapped 1D texture width */
+    CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,       /**< Device supports stream priorities */
+    CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,         /**< Device supports caching globals in L1 */
+    CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,          /**< Device supports caching locals in L1 */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,  /**< Maximum shared memory available per multiprocessor in bytes */
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,  /**< Maximum number of 32-bit registers available per multiprocessor */
+    CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,                    /**< Device can allocate managed memory on this system */
+    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,                    /**< Device is on a multi-GPU board */
+    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,           /**< Unique id for a group of devices on the same multi-GPU board */
+    CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,       /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/
+    CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,  /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
+    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,            /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
+    CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,         /**< Device can coherently access managed memory concurrently with the CPU */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,      /**< Device supports compute preemption. */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, /**< Device can access host registered memory at the same virtual address as the CPU */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,            /**< ::cuStreamBatchMemOp and related APIs are supported. */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,     /**< 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,     /**< ::CU_STREAM_WAIT_VALUE_NOR is supported. */
+    CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,                /**< Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel */
+    CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,   /**< Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, /**< Maximum optin shared memory per block */
+    CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,           /**< Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. */
+    CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,           /**< Device supports host memory registration via ::cudaHostRegister. */
+    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, /**< Device accesses pageable memory via the host's page tables. */
+    CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, /**< The host can directly access managed memory on the device without migration. */
+    CU_DEVICE_ATTRIBUTE_MAX
+} CUdevice_attribute;
+
+/**
+ * Function cache configurations
+ */
+typedef enum CUfunc_cache_enum {
+    CU_FUNC_CACHE_PREFER_NONE    = 0x00, /**< no preference for shared memory or L1 (default) */
+    CU_FUNC_CACHE_PREFER_SHARED  = 0x01, /**< prefer larger shared memory and smaller L1 cache */
+    CU_FUNC_CACHE_PREFER_L1      = 0x02, /**< prefer larger L1 cache and smaller shared memory */
+    CU_FUNC_CACHE_PREFER_EQUAL   = 0x03  /**< prefer equal sized L1 cache and shared memory */
+} CUfunc_cache;
+
+/**
+ * Shared memory configurations
+ */
+typedef enum CUsharedconfig_enum {
+    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE    = 0x00, /**< set default shared memory bank size */
+    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE  = 0x01, /**< set shared memory bank width to four bytes */
+    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02  /**< set shared memory bank width to eight bytes */
+} CUsharedconfig;
+
+/**
+ * Function properties
+ */
+typedef enum CUfunction_attribute_enum {
+    /**
+     * The maximum number of threads per block, beyond which a launch of the
+     * function would fail. This number depends on both the function and the
+     * device on which the function is currently loaded.
+     */
+    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+
+    /**
+     * The size in bytes of statically-allocated shared memory required by
+     * this function. This does not include dynamically-allocated shared
+     * memory requested by the user at runtime.
+     */
+    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+
+    /**
+     * The size in bytes of user-allocated constant memory required by this
+     * function.
+     */
+    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+
+    /**
+     * The size in bytes of local memory used by each thread of this function.
+     */
+    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+
+    /**
+     * The number of registers used by each thread of this function.
+     */
+    CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
+
+    /**
+     * The PTX virtual architecture version for which the function was
+     * compiled. This value is the major PTX version * 10 + the minor PTX
+     * version, so a PTX version 1.3 function would return the value 13.
+     * Note that this may return the undefined value of 0 for cubins
+     * compiled prior to CUDA 3.0.
+     */
+    CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+
+    /**
+     * The binary architecture version for which the function was compiled.
+     * This value is the major binary version * 10 + the minor binary version,
+     * so a binary version 1.3 function would return the value 13. Note that
+     * this will return a value of 10 for legacy cubins that do not have a
+     * properly-encoded binary architecture version.
+     */
+    CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+
+    /**
+     * The attribute to indicate whether the function has been compiled with
+     * user specified option "-Xptxas --dlcm=ca" set .
+     */
+    CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+
+    /**
+     * The maximum size in bytes of dynamically-allocated shared memory that can be used by
+     * this function. If the user-specified dynamic shared memory size is larger than this
+     * value, the launch will fail.
+     * See ::cuFuncSetAttribute
+     */
+    CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
+
+    /**
+     * On devices where the L1 cache and shared memory use the same hardware resources,
+     * this sets the shared memory carveout preference, in percent of the total shared memory.
+     * Refer to ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
+     * This is only a hint, and the driver can choose a different ratio if required to execute the function.
+     * See ::cuFuncSetAttribute
+     */
+    CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
+
+    CU_FUNC_ATTRIBUTE_MAX
+} CUfunction_attribute;
+
+#ifdef _WIN32
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+#define CUDA_API_CALL CUDAAPI
+
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXCREATE)              (CUcontext *, unsigned int, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXDESTROY)             (CUcontext);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXGETCACHECONFIG)      (CUfunc_cache *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXGETCURRENT)          (CUcontext *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXGETSHAREDMEMCONFIG)  (CUsharedconfig *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXPOPCURRENT)          (CUcontext *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXPUSHCURRENT)         (CUcontext);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSETCACHECONFIG)      (CUfunc_cache);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSETCURRENT)          (CUcontext);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSETSHAREDMEMCONFIG)  (CUsharedconfig);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSYNCHRONIZE)         ();
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGETATTRIBUTE)     (int *, CUdevice_attribute, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGETCOUNT)         (int *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGET)              (CUdevice *, int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGETNAME)          (char *, int, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICETOTALMEM)         (size_t *, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDRIVERGETVERSION)       (int *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTCREATE)            (CUevent *, unsigned int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTDESTROY)           (CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTELAPSEDTIME)       (float *, CUevent, CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTQUERY)             (CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTRECORD)            (CUevent, CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTSYNCHRONIZE)       (CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCGETATTRIBUTE)       (int *, CUfunction_attribute, CUfunction);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCSETATTRIBUTE)       (CUfunction, CUfunction_attribute, int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCSETCACHECONFIG)     (CUfunction, CUfunc_cache);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCSETSHAREDMEMCONFIG) (CUfunction, CUsharedconfig);
+typedef CUresult (CUDA_API_CALL *CUDA_CUGETERRORNAME)           (CUresult, const char **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUGETERRORSTRING)         (CUresult, const char **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUINIT)                   (unsigned int);
+typedef CUresult (CUDA_API_CALL *CUDA_CULAUNCHKERNEL)           (CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOC)               (CUdeviceptr *, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOCHOST)           (void **, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOD)             (CUdeviceptr, CUdeviceptr, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOH)             (void *, CUdeviceptr, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTOD)             (CUdeviceptr, const void *, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREE)                (CUdeviceptr);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREEHOST)            (void *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMGETINFO)             (size_t *, size_t *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32)              (CUdeviceptr, unsigned int, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8)               (CUdeviceptr, unsigned char, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETFUNCTION)      (CUfunction *, CUmodule, const char *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETGLOBAL)        (CUdeviceptr *, size_t *, CUmodule, const char *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOAD)             (CUmodule *, const char *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOADDATA)         (CUmodule *, const void *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOADDATAEX)       (CUmodule *, const void *, unsigned int, CUjit_option *, void **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEUNLOAD)           (CUmodule);
+typedef CUresult (CUDA_API_CALL *CUDA_CUPROFILERSTART)          ();
+typedef CUresult (CUDA_API_CALL *CUDA_CUPROFILERSTOP)           ();
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMCREATE)           (CUstream *, unsigned int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMDESTROY)          (CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMSYNCHRONIZE)      (CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMWAITEVENT)        (CUstream, CUevent, unsigned int);
+
+typedef struct hc_cuda_lib
+{
+  hc_dynlib_t lib;
+
+  CUDA_CUCTXCREATE              cuCtxCreate;
+  CUDA_CUCTXDESTROY             cuCtxDestroy;
+  CUDA_CUCTXGETCACHECONFIG      cuCtxGetCacheConfig;
+  CUDA_CUCTXGETCURRENT          cuCtxGetCurrent;
+  CUDA_CUCTXGETSHAREDMEMCONFIG  cuCtxGetSharedMemConfig;
+  CUDA_CUCTXPOPCURRENT          cuCtxPopCurrent;
+  CUDA_CUCTXPUSHCURRENT         cuCtxPushCurrent;
+  CUDA_CUCTXSETCURRENT          cuCtxSetCurrent;
+  CUDA_CUCTXSETSHAREDMEMCONFIG  cuCtxSetSharedMemConfig;
+  CUDA_CUCTXSYNCHRONIZE         cuCtxSynchronize;
+  CUDA_CUDEVICEGETATTRIBUTE     cuDeviceGetAttribute;
+  CUDA_CUDEVICEGETCOUNT         cuDeviceGetCount;
+  CUDA_CUDEVICEGET              cuDeviceGet;
+  CUDA_CUDEVICEGETNAME          cuDeviceGetName;
+  CUDA_CUDEVICETOTALMEM         cuDeviceTotalMem;
+  CUDA_CUDRIVERGETVERSION       cuDriverGetVersion;
+  CUDA_CUEVENTCREATE            cuEventCreate;
+  CUDA_CUEVENTDESTROY           cuEventDestroy;
+  CUDA_CUEVENTELAPSEDTIME       cuEventElapsedTime;
+  CUDA_CUEVENTQUERY             cuEventQuery;
+  CUDA_CUEVENTRECORD            cuEventRecord;
+  CUDA_CUEVENTSYNCHRONIZE       cuEventSynchronize;
+  CUDA_CUFUNCGETATTRIBUTE       cuFuncGetAttribute;
+  CUDA_CUFUNCSETATTRIBUTE       cuFuncSetAttribute;
+  CUDA_CUFUNCSETCACHECONFIG     cuFuncSetCacheConfig;
+  CUDA_CUFUNCSETSHAREDMEMCONFIG cuFuncSetSharedMemConfig;
+  CUDA_CUGETERRORNAME           cuGetErrorName;
+  CUDA_CUGETERRORSTRING         cuGetErrorString;
+  CUDA_CUINIT                   cuInit;
+  CUDA_CULAUNCHKERNEL           cuLaunchKernel;
+  CUDA_CUMEMALLOC               cuMemAlloc;
+  CUDA_CUMEMALLOCHOST           cuMemAllocHost;
+  CUDA_CUMEMCPYDTOD             cuMemcpyDtoD;
+  CUDA_CUMEMCPYDTOH             cuMemcpyDtoH;
+  CUDA_CUMEMCPYHTOD             cuMemcpyHtoD;
+  CUDA_CUMEMFREE                cuMemFree;
+  CUDA_CUMEMFREEHOST            cuMemFreeHost;
+  CUDA_CUMEMGETINFO             cuMemGetInfo;
+  CUDA_CUMEMSETD32              cuMemsetD32;
+  CUDA_CUMEMSETD8               cuMemsetD8;
+  CUDA_CUMODULEGETFUNCTION      cuModuleGetFunction;
+  CUDA_CUMODULEGETGLOBAL        cuModuleGetGlobal;
+  CUDA_CUMODULELOAD             cuModuleLoad;
+  CUDA_CUMODULELOADDATA         cuModuleLoadData;
+  CUDA_CUMODULELOADDATAEX       cuModuleLoadDataEx;
+  CUDA_CUMODULEUNLOAD           cuModuleUnload;
+  CUDA_CUPROFILERSTART          cuProfilerStart;
+  CUDA_CUPROFILERSTOP           cuProfilerStop;
+  CUDA_CUSTREAMCREATE           cuStreamCreate;
+  CUDA_CUSTREAMDESTROY          cuStreamDestroy;
+  CUDA_CUSTREAMSYNCHRONIZE      cuStreamSynchronize;
+  CUDA_CUSTREAMWAITEVENT        cuStreamWaitEvent;
+
+} hc_cuda_lib_t;
+
+typedef hc_cuda_lib_t CUDA_PTR;
+
+#endif // _EXT_CUDA_H
diff --git a/include/types.h b/include/types.h
index 7c1960147..f35eca3f7 100644
--- a/include/types.h
+++ b/include/types.h
@@ -989,6 +989,7 @@ typedef struct link_speed
 
 } link_speed_t;
 
+#include "ext_cuda.h"
 #include "ext_OpenCL.h"
 
 typedef struct hc_device_param
@@ -1328,11 +1329,12 @@ typedef struct hc_device_param
 
 } hc_device_param_t;
 
-typedef struct opencl_ctx
+typedef struct backend_ctx
 {
   bool                enabled;
 
   void               *ocl;
+  void               *cuda;
 
   cl_uint             platforms_cnt;
   cl_platform_id     *platforms;
@@ -1369,7 +1371,7 @@ typedef struct opencl_ctx
 
   int                 force_jit_compilation;
 
-} opencl_ctx_t;
+} backend_ctx_t;
 
 typedef enum kernel_workload
 {
@@ -2299,7 +2301,7 @@ typedef struct hashcat_ctx
   loopback_ctx_t        *loopback_ctx;
   mask_ctx_t            *mask_ctx;
   module_ctx_t          *module_ctx;
-  opencl_ctx_t          *opencl_ctx;
+  backend_ctx_t         *backend_ctx;
   outcheck_ctx_t        *outcheck_ctx;
   outfile_ctx_t         *outfile_ctx;
   pidfile_ctx_t         *pidfile_ctx;
diff --git a/src/Makefile b/src/Makefile
index c2a3c2f8a..196ad6c89 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -274,13 +274,13 @@ endif # MSYS2
 ## Objects
 ##
 
-EMU_OBJS_ALL            := emu_general emu_inc_common emu_inc_scalar emu_inc_simd
+EMU_OBJS_ALL            := emu_general emu_inc_common emu_inc_platform emu_inc_scalar emu_inc_simd
 EMU_OBJS_ALL            += emu_inc_rp emu_inc_rp_optimized
 EMU_OBJS_ALL            += emu_inc_truecrypt_crc32 emu_inc_truecrypt_keyfile emu_inc_truecrypt_xts emu_inc_veracrypt_xts
 EMU_OBJS_ALL            += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512
 EMU_OBJS_ALL            += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish
 
-OBJS_ALL                := affinity autotune benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_nvapi ext_nvml ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp opencl outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
+OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
 
 ifeq ($(ENABLE_BRAIN),1)
 OBJS_ALL                += brain
diff --git a/src/autotune.c b/src/autotune.c
index de54fd063..ab383e63f 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -6,7 +6,7 @@
 #include "common.h"
 #include "types.h"
 #include "event.h"
-#include "opencl.h"
+#include "backend.h"
 #include "status.h"
 #include "autotune.h"
 
@@ -50,11 +50,11 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
 static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
   const hashconfig_t    *hashconfig   = hashcat_ctx->hashconfig;
-  const opencl_ctx_t    *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t   *backend_ctx  = hashcat_ctx->backend_ctx;
   const straight_ctx_t  *straight_ctx = hashcat_ctx->straight_ctx;
   const user_options_t  *user_options = hashcat_ctx->user_options;
 
-  const double target_msec = opencl_ctx->target_msec;
+  const double target_msec = backend_ctx->target_msec;
 
   const u32 kernel_accel_min = device_param->kernel_accel_min;
   const u32 kernel_accel_max = device_param->kernel_accel_max;
@@ -283,11 +283,11 @@ HC_API_CALL void *thread_autotune (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped == true) return NULL;
 
diff --git a/src/opencl.c b/src/backend.c
similarity index 92%
rename from src/opencl.c
rename to src/backend.c
index 4086a00f6..41cf2b645 100644
--- a/src/opencl.c
+++ b/src/backend.c
@@ -22,7 +22,7 @@
 #include "emu_inc_hash_md5.h"
 #include "event.h"
 #include "dynloader.h"
-#include "opencl.h"
+#include "backend.h"
 
 #if defined (__linux__)
 static const char *dri_card0_path = "/dev/dri/card0";
@@ -340,9 +340,9 @@ static bool test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl
 
   if (CL_rc == -1) return false;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   // LLVM seems to write an error message (if there's an error) directly to stderr
   // and not (as supposted to) into buffer for later request using clGetProgramBuildInfo()
@@ -543,11 +543,120 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *profile_d
   snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", profile_dir, attack_kern, device_name_chksum_amp_mp);
 }
 
+int cuda_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  memset (cuda, 0, sizeof (CUDA_PTR));
+
+  #if   defined (_WIN)
+  cuda->lib = hc_dlopen ("cuda");
+  #elif defined (__APPLE__)
+  cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
+  #elif defined (__CYGWIN__)
+  cuda->lib = hc_dlopen ("cuda.dll");
+
+  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("cygcuda-1.dll");
+  #else
+  cuda->lib = hc_dlopen ("libcuda.so");
+
+  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1");
+  #endif
+
+  if (cuda->lib == NULL)
+  {
+    event_log_error (hashcat_ctx, "Cannot find CUDA library.");
+
+    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA runtime or driver for your platform.");
+    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, NULL);
+
+    return -1;
+  }
+
+  HC_LOAD_FUNC (cuda, cuCtxCreate,              CUDA_CUCTXCREATE,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxDestroy,             CUDA_CUCTXDESTROY,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetCacheConfig,      CUDA_CUCTXGETCACHECONFIG,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetCurrent,          CUDA_CUCTXGETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig,  CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxPopCurrent,          CUDA_CUCTXPOPCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxPushCurrent,         CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetCurrent,          CUDA_CUCTXSETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig,  CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSynchronize,         CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetAttribute,     CUDA_CUDEVICEGETATTRIBUTE,      CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetCount,         CUDA_CUDEVICEGETCOUNT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGet,              CUDA_CUDEVICEGET,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetName,          CUDA_CUDEVICEGETNAME,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceTotalMem,         CUDA_CUDEVICETOTALMEM,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDriverGetVersion,       CUDA_CUDRIVERGETVERSION,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventCreate,            CUDA_CUEVENTCREATE,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventDestroy,           CUDA_CUEVENTDESTROY,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventElapsedTime,       CUDA_CUEVENTELAPSEDTIME,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventQuery,             CUDA_CUEVENTQUERY,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventRecord,            CUDA_CUEVENTRECORD,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventSynchronize,       CUDA_CUEVENTSYNCHRONIZE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncGetAttribute,       CUDA_CUFUNCGETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetAttribute,       CUDA_CUFUNCSETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetCacheConfig,     CUDA_CUFUNCSETCACHECONFIG,      CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG,  CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuGetErrorName,           CUDA_CUGETERRORNAME,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuGetErrorString,         CUDA_CUGETERRORSTRING,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuInit,                   CUDA_CUINIT,                    CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuLaunchKernel,           CUDA_CULAUNCHKERNEL,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemAlloc,               CUDA_CUMEMALLOC,                CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemAllocHost,           CUDA_CUMEMALLOCHOST,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyDtoD,             CUDA_CUMEMCPYDTOD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyDtoH,             CUDA_CUMEMCPYDTOH,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyHtoD,             CUDA_CUMEMCPYHTOD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemFree,                CUDA_CUMEMFREE,                 CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemFreeHost,            CUDA_CUMEMFREEHOST,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemGetInfo,             CUDA_CUMEMGETINFO,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemsetD32,              CUDA_CUMEMSETD32,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemsetD8,               CUDA_CUMEMSETD8,                CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleGetFunction,      CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleGetGlobal,        CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoad,             CUDA_CUMODULELOAD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoadData,         CUDA_CUMODULELOADDATA,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoadDataEx,       CUDA_CUMODULELOADDATAEX,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleUnload,           CUDA_CUMODULEUNLOAD,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuProfilerStart,          CUDA_CUPROFILERSTART,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuProfilerStop,           CUDA_CUPROFILERSTOP,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamCreate,           CUDA_CUSTREAMCREATE,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamDestroy,          CUDA_CUSTREAMDESTROY,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamSynchronize,      CUDA_CUSTREAMSYNCHRONIZE,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamWaitEvent,        CUDA_CUSTREAMWAITEVENT,         CUDA, 1);
+
+  return 0;
+}
+
+void cuda_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  if (cuda)
+  {
+    if (cuda->lib)
+    {
+      hc_dlclose (cuda->lib);
+    }
+
+    hcfree (backend_ctx->cuda);
+
+    backend_ctx->cuda = NULL;
+  }
+}
+
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   memset (ocl, 0, sizeof (OCL_PTR));
 
@@ -598,47 +707,47 @@ int ocl_init (hashcat_ctx_t *hashcat_ctx)
     return -1;
   }
 
-  HC_LOAD_FUNC(ocl, clBuildProgram, OCL_CLBUILDPROGRAM, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateBuffer, OCL_CLCREATEBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateCommandQueue, OCL_CLCREATECOMMANDQUEUE, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateContext, OCL_CLCREATECONTEXT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateKernel, OCL_CLCREATEKERNEL, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueCopyBuffer, OCL_CLENQUEUECOPYBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueMapBuffer, OCL_CLENQUEUEMAPBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueNDRangeKernel, OCL_CLENQUEUENDRANGEKERNEL, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueReadBuffer, OCL_CLENQUEUEREADBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueUnmapMemObject, OCL_CLENQUEUEUNMAPMEMOBJECT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueWriteBuffer, OCL_CLENQUEUEWRITEBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clFinish, OCL_CLFINISH, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clFlush, OCL_CLFLUSH, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetDeviceIDs, OCL_CLGETDEVICEIDS, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetDeviceInfo, OCL_CLGETDEVICEINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetEventInfo, OCL_CLGETEVENTINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetKernelWorkGroupInfo, OCL_CLGETKERNELWORKGROUPINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetPlatformIDs, OCL_CLGETPLATFORMIDS, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetPlatformInfo, OCL_CLGETPLATFORMINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetProgramBuildInfo, OCL_CLGETPROGRAMBUILDINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetProgramInfo, OCL_CLGETPROGRAMINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseCommandQueue, OCL_CLRELEASECOMMANDQUEUE, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseContext, OCL_CLRELEASECONTEXT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseKernel, OCL_CLRELEASEKERNEL, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseMemObject, OCL_CLRELEASEMEMOBJECT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseProgram, OCL_CLRELEASEPROGRAM, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clSetKernelArg, OCL_CLSETKERNELARG, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clWaitForEvents, OCL_CLWAITFOREVENTS, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetEventProfilingInfo, OCL_CLGETEVENTPROFILINGINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseEvent, OCL_CLRELEASEEVENT, OpenCL, 1)
+  HC_LOAD_FUNC (ocl, clBuildProgram,            OCL_CLBUILDPROGRAM,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateBuffer,            OCL_CLCREATEBUFFER,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateCommandQueue,      OCL_CLCREATECOMMANDQUEUE,       OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateContext,           OCL_CLCREATECONTEXT,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateKernel,            OCL_CLCREATEKERNEL,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY,  OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE,  OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueCopyBuffer,       OCL_CLENQUEUECOPYBUFFER,        OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueMapBuffer,        OCL_CLENQUEUEMAPBUFFER,         OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueNDRangeKernel,    OCL_CLENQUEUENDRANGEKERNEL,     OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueReadBuffer,       OCL_CLENQUEUEREADBUFFER,        OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueUnmapMemObject,   OCL_CLENQUEUEUNMAPMEMOBJECT,    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueWriteBuffer,      OCL_CLENQUEUEWRITEBUFFER,       OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clFinish,                  OCL_CLFINISH,                   OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clFlush,                   OCL_CLFLUSH,                    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetDeviceIDs,            OCL_CLGETDEVICEIDS,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetDeviceInfo,           OCL_CLGETDEVICEINFO,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetEventInfo,            OCL_CLGETEVENTINFO,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetKernelWorkGroupInfo,  OCL_CLGETKERNELWORKGROUPINFO,   OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetPlatformIDs,          OCL_CLGETPLATFORMIDS,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetPlatformInfo,         OCL_CLGETPLATFORMINFO,          OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetProgramBuildInfo,     OCL_CLGETPROGRAMBUILDINFO,      OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetProgramInfo,          OCL_CLGETPROGRAMINFO,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseCommandQueue,     OCL_CLRELEASECOMMANDQUEUE,      OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseContext,          OCL_CLRELEASECONTEXT,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseKernel,           OCL_CLRELEASEKERNEL,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseMemObject,        OCL_CLRELEASEMEMOBJECT,         OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseProgram,          OCL_CLRELEASEPROGRAM,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clSetKernelArg,            OCL_CLSETKERNELARG,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clWaitForEvents,           OCL_CLWAITFOREVENTS,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetEventProfilingInfo,   OCL_CLGETEVENTPROFILINGINFO,    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseEvent,            OCL_CLRELEASEEVENT,             OpenCL, 1);
 
   return 0;
 }
 
 void ocl_close (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   if (ocl)
   {
@@ -647,15 +756,17 @@ void ocl_close (hashcat_ctx_t *hashcat_ctx)
       hc_dlclose (ocl->lib);
     }
 
-    hcfree (opencl_ctx->ocl);
+    hcfree (backend_ctx->ocl);
+
+    backend_ctx->ocl = NULL;
   }
 }
 
 int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueNDRangeKernel (command_queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event);
 
@@ -671,9 +782,9 @@ int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue comm
 
 int hc_clGetEventInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetEventInfo (event, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -689,9 +800,9 @@ int hc_clGetEventInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_event_info
 
 int hc_clFlush (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clFlush (command_queue);
 
@@ -707,9 +818,9 @@ int hc_clFlush (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 
 int hc_clFinish (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clFinish (command_queue);
 
@@ -725,9 +836,9 @@ int hc_clFinish (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 
 int hc_clSetKernelArg (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clSetKernelArg (kernel, arg_index, arg_size, arg_value);
 
@@ -743,9 +854,9 @@ int hc_clSetKernelArg (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, cl_uint arg
 
 int hc_clEnqueueWriteBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueWriteBuffer (command_queue, buffer, blocking_write, offset, size, ptr, num_events_in_wait_list, event_wait_list, event);
 
@@ -761,9 +872,9 @@ int hc_clEnqueueWriteBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue comman
 
 int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueCopyBuffer (command_queue, src_buffer, dst_buffer, src_offset, dst_offset, size, num_events_in_wait_list, event_wait_list, event);
 
@@ -779,9 +890,9 @@ int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command
 
 int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueReadBuffer (command_queue, buffer, blocking_read, offset, size, ptr, num_events_in_wait_list, event_wait_list, event);
 
@@ -797,9 +908,9 @@ int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command
 
 int hc_clGetPlatformIDs (hashcat_ctx_t *hashcat_ctx, cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetPlatformIDs (num_entries, platforms, num_platforms);
 
@@ -815,9 +926,9 @@ int hc_clGetPlatformIDs (hashcat_ctx_t *hashcat_ctx, cl_uint num_entries, cl_pla
 
 int hc_clGetPlatformInfo (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetPlatformInfo (platform, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -833,9 +944,9 @@ int hc_clGetPlatformInfo (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, c
 
 int hc_clGetDeviceIDs (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetDeviceIDs (platform, device_type, num_entries, devices, num_devices);
 
@@ -851,9 +962,9 @@ int hc_clGetDeviceIDs (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, cl_d
 
 int hc_clGetDeviceInfo (hashcat_ctx_t *hashcat_ctx, cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetDeviceInfo (device, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -869,9 +980,9 @@ int hc_clGetDeviceInfo (hashcat_ctx_t *hashcat_ctx, cl_device_id device, cl_devi
 
 int hc_clCreateContext (hashcat_ctx_t *hashcat_ctx, const cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void (CL_CALLBACK *pfn_notify) (const char *errinfo, const void *private_info, size_t cb, void *user_data), void *user_data, cl_context *context)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -889,9 +1000,9 @@ int hc_clCreateContext (hashcat_ctx_t *hashcat_ctx, const cl_context_properties
 
 int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_command_queue *command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -909,9 +1020,9 @@ int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_
 
 int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -929,9 +1040,9 @@ int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_fl
 
 int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -949,9 +1060,9 @@ int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context
 
 int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_program *program)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -969,9 +1080,9 @@ int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context
 
 int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clBuildProgram (program, num_devices, device_list, options, pfn_notify, user_data);
 
@@ -987,9 +1098,9 @@ int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint n
 
 int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program, const char *kernel_name, cl_kernel *kernel)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -1007,9 +1118,9 @@ int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program, const cha
 
 int hc_clReleaseMemObject (hashcat_ctx_t *hashcat_ctx, cl_mem mem)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseMemObject (mem);
 
@@ -1025,9 +1136,9 @@ int hc_clReleaseMemObject (hashcat_ctx_t *hashcat_ctx, cl_mem mem)
 
 int hc_clReleaseKernel (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseKernel (kernel);
 
@@ -1043,9 +1154,9 @@ int hc_clReleaseKernel (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel)
 
 int hc_clReleaseProgram (hashcat_ctx_t *hashcat_ctx, cl_program program)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseProgram (program);
 
@@ -1061,9 +1172,9 @@ int hc_clReleaseProgram (hashcat_ctx_t *hashcat_ctx, cl_program program)
 
 int hc_clReleaseCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseCommandQueue (command_queue);
 
@@ -1079,9 +1190,9 @@ int hc_clReleaseCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_command_queue comma
 
 int hc_clReleaseContext (hashcat_ctx_t *hashcat_ctx, cl_context context)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseContext (context);
 
@@ -1097,9 +1208,9 @@ int hc_clReleaseContext (hashcat_ctx_t *hashcat_ctx, cl_context context)
 
 int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -1117,9 +1228,9 @@ int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_
 
 int hc_clEnqueueUnmapMemObject (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueUnmapMemObject (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event);
 
@@ -1135,9 +1246,9 @@ int hc_clEnqueueUnmapMemObject (hashcat_ctx_t *hashcat_ctx, cl_command_queue com
 
 int hc_clGetKernelWorkGroupInfo (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetKernelWorkGroupInfo (kernel, device, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1153,9 +1264,9 @@ int hc_clGetKernelWorkGroupInfo (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, c
 
 int hc_clGetProgramBuildInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetProgramBuildInfo (program, device, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1171,9 +1282,9 @@ int hc_clGetProgramBuildInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl
 
 int hc_clGetProgramInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetProgramInfo (program, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1189,9 +1300,9 @@ int hc_clGetProgramInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_prog
 
 int hc_clWaitForEvents (hashcat_ctx_t *hashcat_ctx, cl_uint num_events, const cl_event *event_list)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clWaitForEvents (num_events, event_list);
 
@@ -1207,9 +1318,9 @@ int hc_clWaitForEvents (hashcat_ctx_t *hashcat_ctx, cl_uint num_events, const cl
 
 int hc_clGetEventProfilingInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetEventProfilingInfo (event, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1225,9 +1336,9 @@ int hc_clGetEventProfilingInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_p
 
 int hc_clReleaseEvent (hashcat_ctx_t *hashcat_ctx, cl_event event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseEvent (event);
 
@@ -2927,12 +3038,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
   return 0;
 }
 
-int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
+int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  opencl_ctx->enabled = false;
+  backend_ctx->enabled = false;
 
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
@@ -2943,7 +3054,22 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   hc_device_param_t *devices_param = (hc_device_param_t *) hccalloc (DEVICES_MAX, sizeof (hc_device_param_t));
 
-  opencl_ctx->devices_param = devices_param;
+  backend_ctx->devices_param = devices_param;
+
+  /**
+   * Load and map CUDA library calls
+   */
+
+  CUDA_PTR *cuda = (CUDA_PTR *) hcmalloc (sizeof (CUDA_PTR));
+
+  backend_ctx->cuda = cuda;
+
+  const int rc_cuda_init = cuda_init (hashcat_ctx);
+
+  if (rc_cuda_init == -1)
+  {
+    cuda_close (hashcat_ctx);
+  }
 
   /**
    * Load and map OpenCL library calls
@@ -2951,11 +3077,23 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   OCL_PTR *ocl = (OCL_PTR *) hcmalloc (sizeof (OCL_PTR));
 
-  opencl_ctx->ocl = ocl;
+  backend_ctx->ocl = ocl;
 
   const int rc_ocl_init = ocl_init (hashcat_ctx);
 
-  if (rc_ocl_init == -1) return -1;
+  if (rc_ocl_init == -1)
+  {
+    ocl_close (hashcat_ctx);
+  }
+
+  /**
+   * return if both CUDA and OpenCL initialization failed
+   */
+
+  if ((rc_cuda_init == -1) && (rc_ocl_init == -1))
+  {
+    return -1;
+  }
 
   /**
    * Some permission pre-check, because AMDGPU-PRO Driver crashes if the user has no permission to do this
@@ -2975,7 +3113,7 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (rc_platforms_filter == false) return -1;
 
-  opencl_ctx->opencl_platforms_filter = opencl_platforms_filter;
+  backend_ctx->opencl_platforms_filter = opencl_platforms_filter;
 
   /**
    * OpenCL device selection
@@ -2987,7 +3125,7 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (rc_devices_filter == false) return -1;
 
-  opencl_ctx->devices_filter = devices_filter;
+  backend_ctx->devices_filter = devices_filter;
 
   /**
    * OpenCL device type selection
@@ -2999,7 +3137,7 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (rc_device_types_filter == false) return -1;
 
-  opencl_ctx->device_types_filter = device_types_filter;
+  backend_ctx->device_types_filter = device_types_filter;
 
   /**
    * OpenCL platforms: detect
@@ -3139,58 +3277,58 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    opencl_ctx->device_types_filter = device_types_filter;
+    backend_ctx->device_types_filter = device_types_filter;
   }
 
-  opencl_ctx->enabled = true;
+  backend_ctx->enabled = true;
 
-  opencl_ctx->platforms_vendor      = platforms_vendor;
-  opencl_ctx->platforms_name        = platforms_name;
-  opencl_ctx->platforms_version     = platforms_version;
-  opencl_ctx->platforms_skipped     = platforms_skipped;
-  opencl_ctx->platforms_cnt         = platforms_cnt;
-  opencl_ctx->platforms             = platforms;
-  opencl_ctx->platform_devices_cnt  = platform_devices_cnt;
-  opencl_ctx->platform_devices      = platform_devices;
+  backend_ctx->platforms_vendor      = platforms_vendor;
+  backend_ctx->platforms_name        = platforms_name;
+  backend_ctx->platforms_version     = platforms_version;
+  backend_ctx->platforms_skipped     = platforms_skipped;
+  backend_ctx->platforms_cnt         = platforms_cnt;
+  backend_ctx->platforms             = platforms;
+  backend_ctx->platform_devices_cnt  = platform_devices_cnt;
+  backend_ctx->platform_devices      = platform_devices;
 
   return 0;
 }
 
-void opencl_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
   ocl_close (hashcat_ctx);
 
-  hcfree (opencl_ctx->devices_param);
+  hcfree (backend_ctx->devices_param);
 
-  hcfree (opencl_ctx->platforms);
-  hcfree (opencl_ctx->platform_devices);
-  hcfree (opencl_ctx->platforms_vendor);
-  hcfree (opencl_ctx->platforms_name);
-  hcfree (opencl_ctx->platforms_version);
-  hcfree (opencl_ctx->platforms_skipped);
+  hcfree (backend_ctx->platforms);
+  hcfree (backend_ctx->platform_devices);
+  hcfree (backend_ctx->platforms_vendor);
+  hcfree (backend_ctx->platforms_name);
+  hcfree (backend_ctx->platforms_version);
+  hcfree (backend_ctx->platforms_skipped);
 
-  memset (opencl_ctx, 0, sizeof (opencl_ctx_t));
+  memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
 
-int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
+int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 {
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   /**
    * OpenCL devices: simply push all devices from all platforms into the same device array
    */
 
-  cl_uint         platforms_cnt         = opencl_ctx->platforms_cnt;
-  cl_platform_id *platforms             = opencl_ctx->platforms;
-  cl_uint         platform_devices_cnt  = opencl_ctx->platform_devices_cnt;
-  cl_device_id   *platform_devices      = opencl_ctx->platform_devices;
+  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+  cl_platform_id *platforms             = backend_ctx->platforms;
+  cl_uint         platform_devices_cnt  = backend_ctx->platform_devices_cnt;
+  cl_device_id   *platform_devices      = backend_ctx->platform_devices;
 
   bool need_adl     = false;
   bool need_nvml    = false;
@@ -3221,7 +3359,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (CL_rc == -1) return -1;
 
-    opencl_ctx->platforms_vendor[platform_id] = platform_vendor;
+    backend_ctx->platforms_vendor[platform_id] = platform_vendor;
 
     // platform name
 
@@ -3235,7 +3373,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (CL_rc == -1) return -1;
 
-    opencl_ctx->platforms_name[platform_id] = platform_name;
+    backend_ctx->platforms_name[platform_id] = platform_name;
 
     // platform version
 
@@ -3249,7 +3387,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (CL_rc == -1) return -1;
 
-    opencl_ctx->platforms_version[platform_id] = platform_version;
+    backend_ctx->platforms_version[platform_id] = platform_version;
 
     // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
     // this causes trouble with vendor id based macros
@@ -3298,7 +3436,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       platform_vendor_id = VENDOR_ID_GENERIC;
     }
 
-    bool platform_skipped = ((opencl_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
+    bool platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
 
     CL_rc = hc_clGetDeviceIDs (hashcat_ctx, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
 
@@ -3311,7 +3449,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       platform_skipped = true;
     }
 
-    opencl_ctx->platforms_skipped[platform_id] = platform_skipped;
+    backend_ctx->platforms_skipped[platform_id] = platform_skipped;
 
     if (platform_skipped == true) continue;
 
@@ -3331,7 +3469,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       }
     }
 
-    hc_device_param_t *devices_param = opencl_ctx->devices_param;
+    hc_device_param_t *devices_param = backend_ctx->devices_param;
 
     for (u32 platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
     {
@@ -3710,12 +3848,12 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // skipped
 
-      if ((opencl_ctx->devices_filter & (1ULL << device_id)) == 0)
+      if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
       {
         device_param->skipped = true;
       }
 
-      if ((opencl_ctx->device_types_filter & (device_type)) == 0)
+      if ((backend_ctx->device_types_filter & (device_type)) == 0)
       {
         device_param->skipped = true;
       }
@@ -4091,7 +4229,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
           cl_int CL_err;
 
-          OCL_PTR *ocl = opencl_ctx->ocl;
+          OCL_PTR *ocl = backend_ctx->ocl;
 
           tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
 
@@ -4161,11 +4299,11 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
-  if (opencl_ctx->devices_filter != (u64) -1)
+  if (backend_ctx->devices_filter != (u64) -1)
   {
     const u64 devices_cnt_mask = ~(((u64) -1 >> devices_cnt) << devices_cnt);
 
-    if (opencl_ctx->devices_filter > devices_cnt_mask)
+    if (backend_ctx->devices_filter > devices_cnt_mask)
     {
       event_log_error (hashcat_ctx, "An invalid device was specified using the --opencl-devices parameter.");
       event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", devices_cnt);
@@ -4174,37 +4312,37 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     }
   }
 
-  opencl_ctx->target_msec     = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
+  backend_ctx->target_msec     = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
 
-  opencl_ctx->devices_cnt     = devices_cnt;
-  opencl_ctx->devices_active  = devices_active;
+  backend_ctx->devices_cnt     = devices_cnt;
+  backend_ctx->devices_active  = devices_active;
 
-  opencl_ctx->need_adl        = need_adl;
-  opencl_ctx->need_nvml       = need_nvml;
-  opencl_ctx->need_nvapi      = need_nvapi;
-  opencl_ctx->need_sysfs      = need_sysfs;
+  backend_ctx->need_adl        = need_adl;
+  backend_ctx->need_nvml       = need_nvml;
+  backend_ctx->need_nvapi      = need_nvapi;
+  backend_ctx->need_sysfs      = need_sysfs;
 
-  opencl_ctx->comptime        = comptime;
+  backend_ctx->comptime        = comptime;
 
   return 0;
 }
 
-void opencl_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 platform_id = 0; platform_id < opencl_ctx->platforms_cnt; platform_id++)
+  for (u32 platform_id = 0; platform_id < backend_ctx->platforms_cnt; platform_id++)
   {
-    hcfree (opencl_ctx->platforms_vendor[platform_id]);
-    hcfree (opencl_ctx->platforms_name[platform_id]);
-    hcfree (opencl_ctx->platforms_version[platform_id]);
+    hcfree (backend_ctx->platforms_vendor[platform_id]);
+    hcfree (backend_ctx->platforms_name[platform_id]);
+    hcfree (backend_ctx->platforms_version[platform_id]);
   }
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4215,13 +4353,13 @@ void opencl_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
     hcfree (device_param->device_vendor);
   }
 
-  opencl_ctx->devices_cnt    = 0;
-  opencl_ctx->devices_active = 0;
+  backend_ctx->devices_cnt    = 0;
+  backend_ctx->devices_active = 0;
 
-  opencl_ctx->need_adl    = false;
-  opencl_ctx->need_nvml   = false;
-  opencl_ctx->need_nvapi  = false;
-  opencl_ctx->need_sysfs  = false;
+  backend_ctx->need_adl    = false;
+  backend_ctx->need_nvml   = false;
+  backend_ctx->need_nvapi  = false;
+  backend_ctx->need_sysfs  = false;
 }
 
 static bool is_same_device_type (const hc_device_param_t *src, const hc_device_param_t *dst)
@@ -4247,23 +4385,23 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p
   return true;
 }
 
-void opencl_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id_src = 0; device_id_src < opencl_ctx->devices_cnt; device_id_src++)
+  for (u32 device_id_src = 0; device_id_src < backend_ctx->devices_cnt; device_id_src++)
   {
-    hc_device_param_t *device_param_src = &opencl_ctx->devices_param[device_id_src];
+    hc_device_param_t *device_param_src = &backend_ctx->devices_param[device_id_src];
 
     if (device_param_src->skipped == true) continue;
 
     if (device_param_src->skipped_warning == true) continue;
 
-    for (u32 device_id_dst = device_id_src; device_id_dst < opencl_ctx->devices_cnt; device_id_dst++)
+    for (u32 device_id_dst = device_id_src; device_id_dst < backend_ctx->devices_cnt; device_id_dst++)
     {
-      hc_device_param_t *device_param_dst = &opencl_ctx->devices_param[device_id_dst];
+      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[device_id_dst];
 
       if (device_param_dst->skipped == true) continue;
 
@@ -4286,20 +4424,20 @@ void opencl_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   status_ctx_t         *status_ctx          = hashcat_ctx->status_ctx;
   user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
   user_options_t       *user_options        = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
   u32 kernel_power_all = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4308,7 +4446,7 @@ void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
     kernel_power_all += device_param->kernel_power;
   }
 
-  opencl_ctx->kernel_power_all = kernel_power_all;
+  backend_ctx->kernel_power_all = kernel_power_all;
 
   /*
    * Inform user about possible slow speeds
@@ -4330,22 +4468,22 @@ void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
 {
   combinator_ctx_t     *combinator_ctx      = hashcat_ctx->combinator_ctx;
   hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
   hashes_t             *hashes              = hashcat_ctx->hashes;
   mask_ctx_t           *mask_ctx            = hashcat_ctx->mask_ctx;
-  opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
   user_options_t       *user_options        = hashcat_ctx->user_options;
   user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4567,23 +4705,23 @@ static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param
   return kernel_threads;
 }
 
-int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
+int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 {
   const bitmap_ctx_t         *bitmap_ctx          = hashcat_ctx->bitmap_ctx;
   const folder_config_t      *folder_config       = hashcat_ctx->folder_config;
   const hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
   const hashes_t             *hashes              = hashcat_ctx->hashes;
   const module_ctx_t         *module_ctx          = hashcat_ctx->module_ctx;
-        opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+        backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   const straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
   const user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
   const user_options_t       *user_options        = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   u32 hardware_power_all = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     int CL_rc = CL_SUCCESS;
 
@@ -4591,7 +4729,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
      * host buffer
      */
 
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4996,7 +5134,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
     char *device_name_chksum_amp_mp = (char *) hcmalloc (HCBUFSIZ_TINY);
 
     const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s-%d-%u",
-      opencl_ctx->comptime,
+      backend_ctx->comptime,
       device_param->platform_vendor_id,
       device_param->device_name,
       device_param->device_version,
@@ -5005,7 +5143,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
       hashconfig->kern_type);
 
     const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s",
-      opencl_ctx->comptime,
+      backend_ctx->comptime,
       device_param->platform_vendor_id,
       device_param->device_name,
       device_param->device_version,
@@ -6700,7 +6838,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
       #endif
 
       // we assume all devices have the same specs here, which is wrong, it's a start
-      if ((size_total_host * opencl_ctx->devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
+      if ((size_total_host * backend_ctx->devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
 
       if (memory_limit_hit == 1)
       {
@@ -6864,20 +7002,20 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
     if (hardware_power_all == 0) return -1;
   }
 
-  opencl_ctx->hardware_power_all = hardware_power_all;
+  backend_ctx->hardware_power_all = hardware_power_all;
 
   return 0;
 }
 
-void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
+void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7035,15 +7173,15 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_session_reset (hashcat_ctx_t *hashcat_ctx)
+void backend_session_reset (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7081,22 +7219,22 @@ void opencl_session_reset (hashcat_ctx_t *hashcat_ctx)
     #endif
   }
 
-  opencl_ctx->kernel_power_all   = 0;
-  opencl_ctx->kernel_power_final = 0;
+  backend_ctx->kernel_power_all   = 0;
+  backend_ctx->kernel_power_final = 0;
 }
 
-int opencl_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
+int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
 {
   combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx;
   hashconfig_t     *hashconfig     = hashcat_ctx->hashconfig;
-  opencl_ctx_t     *opencl_ctx     = hashcat_ctx->opencl_ctx;
+  backend_ctx_t     *backend_ctx     = hashcat_ctx->backend_ctx;
   user_options_t   *user_options   = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7143,19 +7281,19 @@ int opencl_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
-int opencl_session_update_mp (hashcat_ctx_t *hashcat_ctx)
+int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
 {
   mask_ctx_t     *mask_ctx     = hashcat_ctx->mask_ctx;
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   if (user_options->slow_candidates == true) return 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7176,19 +7314,19 @@ int opencl_session_update_mp (hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
-int opencl_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r)
+int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r)
 {
   mask_ctx_t     *mask_ctx     = hashcat_ctx->mask_ctx;
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   if (user_options->slow_candidates == true) return 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
diff --git a/src/dispatch.c b/src/dispatch.c
index e43a23ad0..466b16604 100644
--- a/src/dispatch.c
+++ b/src/dispatch.c
@@ -7,7 +7,7 @@
 #include "types.h"
 #include "event.h"
 #include "memory.h"
-#include "opencl.h"
+#include "backend.h"
 #include "wordlist.h"
 #include "shared.h"
 #include "thread.h"
@@ -23,13 +23,13 @@
 
 static u64 get_highest_words_done (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64 words_cur = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -45,13 +45,13 @@ static u64 get_highest_words_done (const hashcat_ctx_t *hashcat_ctx)
 
 static u64 get_lowest_words_done (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64 words_cur = 0xffffffffffffffff;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -76,20 +76,20 @@ static int set_kernel_power_final (hashcat_ctx_t *hashcat_ctx, const u64 kernel_
 {
   EVENT (EVENT_SET_KERNEL_POWER_FINAL);
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  opencl_ctx->kernel_power_final = kernel_power_final;
+  backend_ctx->kernel_power_final = kernel_power_final;
 
   return 0;
 }
 
-static u64 get_power (opencl_ctx_t *opencl_ctx, hc_device_param_t *device_param)
+static u64 get_power (backend_ctx_t *backend_ctx, hc_device_param_t *device_param)
 {
-  const u64 kernel_power_final = opencl_ctx->kernel_power_final;
+  const u64 kernel_power_final = backend_ctx->kernel_power_final;
 
   if (kernel_power_final)
   {
-    const double device_factor = (double) device_param->hardware_power / opencl_ctx->hardware_power_all;
+    const double device_factor = (double) device_param->hardware_power / backend_ctx->hardware_power_all;
 
     const u64 words_left_device = (u64) CEIL (kernel_power_final * device_factor);
 
@@ -109,7 +109,7 @@ static u64 get_power (opencl_ctx_t *opencl_ctx, hc_device_param_t *device_param)
 
 static u64 get_work (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 max)
 {
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   status_ctx_t   *status_ctx   = hashcat_ctx->status_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -120,19 +120,19 @@ static u64 get_work (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   device_param->words_off = words_off;
 
-  const u64 kernel_power_all = opencl_ctx->kernel_power_all;
+  const u64 kernel_power_all = backend_ctx->kernel_power_all;
 
   const u64 words_left = words_base - words_off;
 
   if (words_left < kernel_power_all)
   {
-    if (opencl_ctx->kernel_power_final == 0)
+    if (backend_ctx->kernel_power_final == 0)
     {
       set_kernel_power_final (hashcat_ctx, words_left);
     }
   }
 
-  const u64 kernel_power = get_power (opencl_ctx, device_param);
+  const u64 kernel_power = get_power (backend_ctx, device_param);
 
   u64 work = MIN (words_left, kernel_power);
 
@@ -339,11 +339,11 @@ HC_API_CALL void *thread_calc_stdin (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped) return NULL;
 
@@ -370,7 +370,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
   mask_ctx_t           *mask_ctx           = hashcat_ctx->mask_ctx;
   straight_ctx_t       *straight_ctx       = hashcat_ctx->straight_ctx;
   combinator_ctx_t     *combinator_ctx     = hashcat_ctx->combinator_ctx;
-  opencl_ctx_t         *opencl_ctx         = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx        = hashcat_ctx->backend_ctx;
   status_ctx_t         *status_ctx         = hashcat_ctx->status_ctx;
 
   const u32 attack_mode = user_options->attack_mode;
@@ -468,7 +468,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
         // this greatly reduces spam on hashcat console
 
-        const u64 pre_rejects_ignore = get_power (opencl_ctx, device_param) / 2;
+        const u64 pre_rejects_ignore = get_power (backend_ctx, device_param) / 2;
 
         while (pre_rejects > pre_rejects_ignore)
         {
@@ -801,7 +801,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
         // this greatly reduces spam on hashcat console
 
-        const u64 pre_rejects_ignore = get_power (opencl_ctx, device_param) / 2;
+        const u64 pre_rejects_ignore = get_power (backend_ctx, device_param) / 2;
 
         while (pre_rejects > pre_rejects_ignore)
         {
@@ -1082,7 +1082,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
         // this greatly reduces spam on hashcat console
 
-        const u64 pre_rejects_ignore = get_power (opencl_ctx, device_param) / 2;
+        const u64 pre_rejects_ignore = get_power (backend_ctx, device_param) / 2;
 
         while (pre_rejects > pre_rejects_ignore)
         {
@@ -1658,11 +1658,11 @@ HC_API_CALL void *thread_calc (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped) return NULL;
 
diff --git a/src/emu_inc_platform.c b/src/emu_inc_platform.c
new file mode 100644
index 000000000..f390abf0a
--- /dev/null
+++ b/src/emu_inc_platform.c
@@ -0,0 +1,11 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "bitops.h"
+#include "emu_general.h"
+
+#include "inc_platform.cl"
diff --git a/src/ext_cuda.c b/src/ext_cuda.c
new file mode 100644
index 000000000..dc43e1b61
--- /dev/null
+++ b/src/ext_cuda.c
@@ -0,0 +1,8 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "ext_cuda.h"
diff --git a/src/hashcat.c b/src/hashcat.c
index b45319520..68fe33d73 100644
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -34,7 +34,7 @@
 #include "loopback.h"
 #include "monitor.h"
 #include "mpsp.h"
-#include "opencl.h"
+#include "backend.h"
 #include "outfile_check.h"
 #include "outfile.h"
 #include "pidfile.h"
@@ -59,7 +59,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
   hashes_t             *hashes              = hashcat_ctx->hashes;
   induct_ctx_t         *induct_ctx          = hashcat_ctx->induct_ctx;
   logfile_ctx_t        *logfile_ctx         = hashcat_ctx->logfile_ctx;
-  opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   restore_ctx_t        *restore_ctx         = hashcat_ctx->restore_ctx;
   status_ctx_t         *status_ctx          = hashcat_ctx->status_ctx;
   user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
@@ -109,7 +109,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     user_options->skip = 0;
   }
 
-  opencl_session_reset (hashcat_ctx);
+  backend_session_reset (hashcat_ctx);
 
   cpt_ctx_reset (hashcat_ctx);
 
@@ -174,15 +174,15 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
    * this is required for autotune
    */
 
-  opencl_ctx_devices_kernel_loops (hashcat_ctx);
+  backend_ctx_devices_kernel_loops (hashcat_ctx);
 
   /**
    * prepare thread buffers
    */
 
-  thread_param_t *threads_param = (thread_param_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (thread_param_t));
+  thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->devices_cnt, sizeof (thread_param_t));
 
-  hc_thread_t *c_threads = (hc_thread_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (hc_thread_t));
+  hc_thread_t *c_threads = (hc_thread_t *) hccalloc (backend_ctx->devices_cnt, sizeof (hc_thread_t));
 
   /**
    * create autotune threads
@@ -192,7 +192,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
 
   status_ctx->devices_status = STATUS_AUTOTUNE;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     thread_param_t *thread_param = threads_param + device_id;
 
@@ -202,7 +202,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     hc_thread_create (c_threads[device_id], thread_autotune, thread_param);
   }
 
-  hc_thread_wait (opencl_ctx->devices_cnt, c_threads);
+  hc_thread_wait (backend_ctx->devices_cnt, c_threads);
 
   EVENT (EVENT_AUTOTUNE_FINISHED);
 
@@ -210,13 +210,13 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
    * find same opencl devices and equal results
    */
 
-  opencl_ctx_devices_sync_tuning (hashcat_ctx);
+  backend_ctx_devices_sync_tuning (hashcat_ctx);
 
   /**
-   * autotune modified kernel_accel, which modifies opencl_ctx->kernel_power_all
+   * autotune modified kernel_accel, which modifies backend_ctx->kernel_power_all
    */
 
-  opencl_ctx_devices_update_power (hashcat_ctx);
+  backend_ctx_devices_update_power (hashcat_ctx);
 
   /**
    * Begin loopback recording
@@ -249,7 +249,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
 
   status_ctx->accessible = true;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     thread_param_t *thread_param = threads_param + device_id;
 
@@ -266,7 +266,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  hc_thread_wait (opencl_ctx->devices_cnt, c_threads);
+  hc_thread_wait (backend_ctx->devices_cnt, c_threads);
 
   hcfree (c_threads);
 
@@ -438,7 +438,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
   hashconfig_t   *hashconfig    = hashcat_ctx->hashconfig;
   hashes_t       *hashes        = hashcat_ctx->hashes;
   mask_ctx_t     *mask_ctx      = hashcat_ctx->mask_ctx;
-  opencl_ctx_t   *opencl_ctx    = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx   = hashcat_ctx->backend_ctx;
   outcheck_ctx_t *outcheck_ctx  = hashcat_ctx->outcheck_ctx;
   restore_ctx_t  *restore_ctx   = hashcat_ctx->restore_ctx;
   status_ctx_t   *status_ctx    = hashcat_ctx->status_ctx;
@@ -722,7 +722,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
   EVENT (EVENT_OPENCL_SESSION_PRE);
 
-  const int rc_session_begin = opencl_session_begin (hashcat_ctx);
+  const int rc_session_begin = backend_session_begin (hashcat_ctx);
 
   if (rc_session_begin == -1) return -1;
 
@@ -736,13 +736,13 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
   {
     EVENT (EVENT_SELFTEST_STARTING);
 
-    thread_param_t *threads_param = (thread_param_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (thread_param_t));
+    thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->devices_cnt, sizeof (thread_param_t));
 
-    hc_thread_t *selftest_threads = (hc_thread_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (hc_thread_t));
+    hc_thread_t *selftest_threads = (hc_thread_t *) hccalloc (backend_ctx->devices_cnt, sizeof (hc_thread_t));
 
     status_ctx->devices_status = STATUS_SELFTEST;
 
-    for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+    for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
     {
       thread_param_t *thread_param = threads_param + device_id;
 
@@ -752,7 +752,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
       hc_thread_create (selftest_threads[device_id], thread_selftest, thread_param);
     }
 
-    hc_thread_wait (opencl_ctx->devices_cnt, selftest_threads);
+    hc_thread_wait (backend_ctx->devices_cnt, selftest_threads);
 
     hcfree (threads_param);
 
@@ -760,11 +760,11 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
     // check for any selftest failures
 
-    for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+    for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
     {
-      if (opencl_ctx->enabled == false) continue;
+      if (backend_ctx->enabled == false) continue;
 
-      hc_device_param_t *device_param = opencl_ctx->devices_param + device_id;
+      hc_device_param_t *device_param = backend_ctx->devices_param + device_id;
 
       if (device_param->skipped == true) continue;
 
@@ -881,7 +881,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
   // finalize opencl session
 
-  opencl_session_destroy (hashcat_ctx);
+  backend_session_destroy (hashcat_ctx);
 
   // clean up
 
@@ -930,7 +930,7 @@ int hashcat_init (hashcat_ctx_t *hashcat_ctx, void (*event) (const u32, struct h
   hashcat_ctx->loopback_ctx       = (loopback_ctx_t *)        hcmalloc (sizeof (loopback_ctx_t));
   hashcat_ctx->mask_ctx           = (mask_ctx_t *)            hcmalloc (sizeof (mask_ctx_t));
   hashcat_ctx->module_ctx         = (module_ctx_t *)          hcmalloc (sizeof (module_ctx_t));
-  hashcat_ctx->opencl_ctx         = (opencl_ctx_t *)          hcmalloc (sizeof (opencl_ctx_t));
+  hashcat_ctx->backend_ctx        = (backend_ctx_t *)         hcmalloc (sizeof (backend_ctx_t));
   hashcat_ctx->outcheck_ctx       = (outcheck_ctx_t *)        hcmalloc (sizeof (outcheck_ctx_t));
   hashcat_ctx->outfile_ctx        = (outfile_ctx_t *)         hcmalloc (sizeof (outfile_ctx_t));
   hashcat_ctx->pidfile_ctx        = (pidfile_ctx_t *)         hcmalloc (sizeof (pidfile_ctx_t));
@@ -964,7 +964,7 @@ void hashcat_destroy (hashcat_ctx_t *hashcat_ctx)
   hcfree (hashcat_ctx->loopback_ctx);
   hcfree (hashcat_ctx->mask_ctx);
   hcfree (hashcat_ctx->module_ctx);
-  hcfree (hashcat_ctx->opencl_ctx);
+  hcfree (hashcat_ctx->backend_ctx);
   hcfree (hashcat_ctx->outcheck_ctx);
   hcfree (hashcat_ctx->outfile_ctx);
   hcfree (hashcat_ctx->pidfile_ctx);
@@ -1172,15 +1172,15 @@ int hashcat_session_init (hashcat_ctx_t *hashcat_ctx, const char *install_folder
    * Init OpenCL library loader
    */
 
-  const int rc_opencl_init = opencl_ctx_init (hashcat_ctx);
+  const int rc_backend_init = backend_ctx_init (hashcat_ctx);
 
-  if (rc_opencl_init == -1) return -1;
+  if (rc_backend_init == -1) return -1;
 
   /**
    * Init OpenCL devices
    */
 
-  const int rc_devices_init = opencl_ctx_devices_init (hashcat_ctx, comptime);
+  const int rc_devices_init = backend_ctx_devices_init (hashcat_ctx, comptime);
 
   if (rc_devices_init == -1) return -1;
 
@@ -1341,25 +1341,25 @@ int hashcat_session_destroy (hashcat_ctx_t *hashcat_ctx)
   #endif
   #endif
 
-  debugfile_destroy          (hashcat_ctx);
-  dictstat_destroy           (hashcat_ctx);
-  folder_config_destroy      (hashcat_ctx);
-  hwmon_ctx_destroy          (hashcat_ctx);
-  induct_ctx_destroy         (hashcat_ctx);
-  logfile_destroy            (hashcat_ctx);
-  loopback_destroy           (hashcat_ctx);
-  opencl_ctx_devices_destroy (hashcat_ctx);
-  opencl_ctx_destroy         (hashcat_ctx);
-  outcheck_ctx_destroy       (hashcat_ctx);
-  outfile_destroy            (hashcat_ctx);
-  pidfile_ctx_destroy        (hashcat_ctx);
-  potfile_destroy            (hashcat_ctx);
-  restore_ctx_destroy        (hashcat_ctx);
-  tuning_db_destroy          (hashcat_ctx);
-  user_options_destroy       (hashcat_ctx);
-  user_options_extra_destroy (hashcat_ctx);
-  status_ctx_destroy         (hashcat_ctx);
-  event_ctx_destroy          (hashcat_ctx);
+  debugfile_destroy           (hashcat_ctx);
+  dictstat_destroy            (hashcat_ctx);
+  folder_config_destroy       (hashcat_ctx);
+  hwmon_ctx_destroy           (hashcat_ctx);
+  induct_ctx_destroy          (hashcat_ctx);
+  logfile_destroy             (hashcat_ctx);
+  loopback_destroy            (hashcat_ctx);
+  backend_ctx_devices_destroy (hashcat_ctx);
+  backend_ctx_destroy         (hashcat_ctx);
+  outcheck_ctx_destroy        (hashcat_ctx);
+  outfile_destroy             (hashcat_ctx);
+  pidfile_ctx_destroy         (hashcat_ctx);
+  potfile_destroy             (hashcat_ctx);
+  restore_ctx_destroy         (hashcat_ctx);
+  tuning_db_destroy           (hashcat_ctx);
+  user_options_destroy        (hashcat_ctx);
+  user_options_extra_destroy  (hashcat_ctx);
+  status_ctx_destroy          (hashcat_ctx);
+  event_ctx_destroy           (hashcat_ctx);
 
   return 0;
 }
diff --git a/src/hashes.c b/src/hashes.c
index 3b1aeb50a..653123d9d 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -14,7 +14,7 @@
 #include "terminal.h"
 #include "logfile.h"
 #include "loopback.h"
-#include "opencl.h"
+#include "backend.h"
 #include "outfile.h"
 #include "potfile.h"
 #include "rp.h"
diff --git a/src/hwmon.c b/src/hwmon.c
index c8a568c49..d38d4f052 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -47,9 +47,9 @@ static void sysfs_close (hashcat_ctx_t *hashcat_ctx)
 
 static char *hm_SYSFS_get_syspath_device (hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   char *syspath;
 
@@ -1344,16 +1344,16 @@ static int hm_get_adapter_index_nvml (hashcat_ctx_t *hashcat_ctx, HM_ADAPTER_NVM
 
 int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1380,7 +1380,7 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1404,16 +1404,16 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
 
 int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1428,7 +1428,7 @@ int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1452,16 +1452,16 @@ int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
 
 int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].temperature_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1511,7 +1511,7 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1535,16 +1535,16 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
 int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].fanpolicy_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1580,7 +1580,7 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     return 1;
   }
@@ -1593,16 +1593,16 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
 int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].fanspeed_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1658,7 +1658,7 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1682,16 +1682,16 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
 int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].buslanes_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1724,7 +1724,7 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1748,16 +1748,16 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
 int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].utilization_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1776,7 +1776,7 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1800,16 +1800,16 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
 int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].memoryspeed_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1842,7 +1842,7 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1866,16 +1866,16 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
 int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].corespeed_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1908,7 +1908,7 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1932,20 +1932,20 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
 int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].throttle_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1961,7 +1961,7 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
       clocksThrottleReasons &= ~nvmlClocksThrottleReasonApplicationsClocksSetting;
       clocksThrottleReasons &= ~nvmlClocksThrottleReasonUnknown;
 
-      if (opencl_ctx->kernel_power_final)
+      if (backend_ctx->kernel_power_final)
       {
         clocksThrottleReasons &= ~nvmlClocksThrottleReasonHwSlowdown;
       }
@@ -1999,7 +1999,7 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 {
   hwmon_ctx_t    *hwmon_ctx    = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
   hwmon_ctx->enabled = false;
@@ -2037,7 +2037,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     hcfree (hm_adapters_sysfs); \
   }
 
-  if (opencl_ctx->need_nvml == true)
+  if (backend_ctx->need_nvml == true)
   {
     hwmon_ctx->hm_nvml = (NVML_PTR *) hcmalloc (sizeof (NVML_PTR));
 
@@ -2049,7 +2049,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if ((opencl_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
+  if ((backend_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
   {
     hwmon_ctx->hm_nvapi = (NVAPI_PTR *) hcmalloc (sizeof (NVAPI_PTR));
 
@@ -2061,7 +2061,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (opencl_ctx->need_adl == true)
+  if (backend_ctx->need_adl == true)
   {
     hwmon_ctx->hm_adl = (ADL_PTR *) hcmalloc (sizeof (ADL_PTR));
 
@@ -2073,7 +2073,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (opencl_ctx->need_sysfs == true)
+  if (backend_ctx->need_sysfs == true)
   {
     hwmon_ctx->hm_sysfs = (SYSFS_PTR *) hcmalloc (sizeof (SYSFS_PTR));
 
@@ -2102,9 +2102,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in = hm_get_adapter_index_nvml (hashcat_ctx, nvmlGPUHandle);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -2152,9 +2152,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in = hm_get_adapter_index_nvapi (hashcat_ctx, nvGPUHandle);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -2221,9 +2221,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
         return -1;
       }
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -2271,9 +2271,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     {
       int hm_adapters_id = 0;
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
@@ -2308,15 +2308,15 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * save buffer required for later restores
    */
 
-  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (opencl_ctx->devices_cnt, sizeof (ADLOD6MemClockState));
+  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (backend_ctx->devices_cnt, sizeof (ADLOD6MemClockState));
 
   /**
    * HM devices: copy
    */
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
diff --git a/src/interface.c b/src/interface.c
index e8d396cd4..6218da192 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -8,7 +8,7 @@
 #include "memory.h"
 #include "event.h"
 #include "shared.h"
-#include "opencl.h"
+#include "backend.h"
 #include "modules.h"
 #include "dynloader.h"
 #include "interface.h"
diff --git a/src/main.c b/src/main.c
index 28ea88165..9cce0b835 100644
--- a/src/main.c
+++ b/src/main.c
@@ -549,7 +549,7 @@ static void main_outerloop_mainscreen (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
   event_log_info (hashcat_ctx, NULL);
 }
 
-static void main_opencl_session_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+static void main_backend_session_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -558,7 +558,7 @@ static void main_opencl_session_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA
   event_log_info_nn (hashcat_ctx, "Initializing device kernels and memory...");
 }
 
-static void main_opencl_session_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+static void main_backend_session_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -1022,8 +1022,8 @@ static void event (const u32 id, hashcat_ctx_t *hashcat_ctx, const void *buf, co
     case EVENT_MONITOR_PERFORMANCE_HINT:  main_monitor_performance_hint  (hashcat_ctx, buf, len); break;
     case EVENT_MONITOR_NOINPUT_HINT:      main_monitor_noinput_hint      (hashcat_ctx, buf, len); break;
     case EVENT_MONITOR_NOINPUT_ABORT:     main_monitor_noinput_abort     (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_SESSION_POST:       main_opencl_session_post       (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_SESSION_PRE:        main_opencl_session_pre        (hashcat_ctx, buf, len); break;
+    case EVENT_OPENCL_SESSION_POST:       main_backend_session_post      (hashcat_ctx, buf, len); break;
+    case EVENT_OPENCL_SESSION_PRE:        main_backend_session_pre       (hashcat_ctx, buf, len); break;
     case EVENT_OPENCL_DEVICE_INIT_POST:   main_opencl_device_init_post   (hashcat_ctx, buf, len); break;
     case EVENT_OPENCL_DEVICE_INIT_PRE:    main_opencl_device_init_pre    (hashcat_ctx, buf, len); break;
     case EVENT_OUTERLOOP_FINISHED:        main_outerloop_finished        (hashcat_ctx, buf, len); break;
diff --git a/src/monitor.c b/src/monitor.c
index 2b942890f..6c5d4577e 100644
--- a/src/monitor.c
+++ b/src/monitor.c
@@ -44,7 +44,7 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 {
   hashes_t       *hashes        = hashcat_ctx->hashes;
   hwmon_ctx_t    *hwmon_ctx     = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t   *opencl_ctx    = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx   = hashcat_ctx->backend_ctx;
   restore_ctx_t  *restore_ctx   = hashcat_ctx->restore_ctx;
   status_ctx_t   *status_ctx    = hashcat_ctx->status_ctx;
   user_options_t *user_options  = hashcat_ctx->user_options;
@@ -114,13 +114,13 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
     {
       hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
-        if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         const int temperature = hm_get_temperature_with_device_id (hashcat_ctx, device_id);
 
@@ -132,9 +132,9 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
         }
       }
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -232,9 +232,9 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 
       hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
diff --git a/src/mpsp.c b/src/mpsp.c
index a5072776e..c81f6c2e5 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -11,7 +11,7 @@
 #include "logfile.h"
 #include "convert.h"
 #include "filehandling.h"
-#include "opencl.h"
+#include "backend.h"
 #include "shared.h"
 #include "ext_lzma.h"
 #include "mpsp.h"
@@ -1224,7 +1224,7 @@ int mask_ctx_update_loop (hashcat_ctx_t *hashcat_ctx)
           return -1;
         }
 
-        const int rc_update_mp = opencl_session_update_mp (hashcat_ctx);
+        const int rc_update_mp = backend_session_update_mp (hashcat_ctx);
 
         if (rc_update_mp == -1) return -1;
       }
@@ -1257,13 +1257,13 @@ int mask_ctx_update_loop (hashcat_ctx_t *hashcat_ctx)
           return -1;
         }
 
-        const int rc_update_mp = opencl_session_update_mp (hashcat_ctx);
+        const int rc_update_mp = backend_session_update_mp (hashcat_ctx);
 
         if (rc_update_mp == -1) return -1;
       }
     }
 
-    const int rc_update_combinator = opencl_session_update_combinator (hashcat_ctx);
+    const int rc_update_combinator = backend_session_update_combinator (hashcat_ctx);
 
     if (rc_update_combinator == -1) return -1;
   }
@@ -1378,7 +1378,7 @@ int mask_ctx_update_loop (hashcat_ctx_t *hashcat_ctx)
         return -1;
       }
 
-      const int rc_update_mp_rl = opencl_session_update_mp_rl (hashcat_ctx, css_cnt_lr[0], css_cnt_lr[1]);
+      const int rc_update_mp_rl = backend_session_update_mp_rl (hashcat_ctx, css_cnt_lr[0], css_cnt_lr[1]);
 
       if (rc_update_mp_rl == -1) return -1;
     }
diff --git a/src/outfile.c b/src/outfile.c
index a7abbce00..06e5c80ed 100644
--- a/src/outfile.c
+++ b/src/outfile.c
@@ -11,7 +11,7 @@
 #include "rp.h"
 #include "emu_inc_rp.h"
 #include "emu_inc_rp_optimized.h"
-#include "opencl.h"
+#include "backend.h"
 #include "shared.h"
 #include "locking.h"
 #include "outfile.h"
diff --git a/src/selftest.c b/src/selftest.c
index 89a527298..9f97aff2c 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -8,7 +8,7 @@
 #include "event.h"
 #include "bitops.h"
 #include "convert.h"
-#include "opencl.h"
+#include "backend.h"
 #include "thread.h"
 #include "selftest.h"
 
@@ -572,15 +572,15 @@ HC_API_CALL void *thread_selftest (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
   user_options_t *user_options = hashcat_ctx->user_options;
 
   if (user_options->self_test_disable == true) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped == true) return NULL;
 
diff --git a/src/status.c b/src/status.c
index ed4cb9281..3086c6066 100644
--- a/src/status.c
+++ b/src/status.c
@@ -200,32 +200,32 @@ double get_avg_exec_time (hc_device_param_t *device_param, const int last_num_en
 
 int status_get_device_info_cnt (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  return opencl_ctx->devices_cnt;
+  return backend_ctx->devices_cnt;
 }
 
 int status_get_device_info_active (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  return opencl_ctx->devices_active;
+  return backend_ctx->devices_active;
 }
 
 bool status_get_skipped_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   return device_param->skipped;
 }
 
 bool status_get_skipped_warning_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   return device_param->skipped_warning;
 }
@@ -836,13 +836,13 @@ int status_get_guess_mask_length (const hashcat_ctx_t *hashcat_ctx)
 char *status_get_guess_candidates_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
   const hashconfig_t         *hashconfig         = hashcat_ctx->hashconfig;
-  const opencl_ctx_t         *opencl_ctx         = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t        *backend_ctx        = hashcat_ctx->backend_ctx;
   const status_ctx_t         *status_ctx         = hashcat_ctx->status_ctx;
   const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
 
   if (status_ctx->accessible == false) return NULL;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   char *display = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1410,11 +1410,11 @@ u64 status_get_progress_end_relative_skip (const hashcat_ctx_t *hashcat_ctx)
 
 double status_get_hashes_msec_all (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   double hashes_all_msec = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     hashes_all_msec += status_get_hashes_msec_dev (hashcat_ctx, device_id);
   }
@@ -1424,12 +1424,12 @@ double status_get_hashes_msec_all (const hashcat_ctx_t *hashcat_ctx)
 
 double status_get_hashes_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64    speed_cnt  = 0;
   double speed_msec = 0;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if ((device_param->skipped == false) && (device_param->skipped_warning == false))
   {
@@ -1459,12 +1459,12 @@ double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, c
 {
   // this function increases accuracy for benchmark modes
 
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64    speed_cnt  = 0;
   double speed_msec = 0;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if ((device_param->skipped == false) && (device_param->skipped_warning == false))
   {
@@ -1486,11 +1486,11 @@ double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, c
 
 double status_get_exec_msec_all (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   double exec_all_msec = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     exec_all_msec += status_get_exec_msec_dev (hashcat_ctx, device_id);
   }
@@ -1500,9 +1500,9 @@ double status_get_exec_msec_all (const hashcat_ctx_t *hashcat_ctx)
 
 double status_get_exec_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   double exec_dev_msec = 0;
 
@@ -1700,9 +1700,9 @@ char *status_get_cpt (const hashcat_ctx_t *hashcat_ctx)
 
 int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int salt_pos = 0;
 
@@ -1716,9 +1716,9 @@ int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_
 
 int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int innerloop_pos = 0;
 
@@ -1732,9 +1732,9 @@ int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int de
 
 int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int innerloop_left = 0;
 
@@ -1748,9 +1748,9 @@ int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 
 int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int iteration_pos = 0;
 
@@ -1764,9 +1764,9 @@ int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int de
 
 int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int iteration_left = 0;
 
@@ -1781,9 +1781,9 @@ int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 #ifdef WITH_BRAIN
 int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int brain_client_id = -1;
 
@@ -1797,9 +1797,9 @@ int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const
 
 int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int brain_link_status_dev = 0;
 
@@ -1815,9 +1815,9 @@ int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const in
 
 char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_recv_bytes = 0;
 
@@ -1835,9 +1835,9 @@ char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, co
 
 char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_send_bytes = 0;
 
@@ -1855,9 +1855,9 @@ char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, co
 
 char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_recv_bytes = 0;
 
@@ -1882,9 +1882,9 @@ char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx
 
 char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_send_bytes = 0;
 
@@ -1910,9 +1910,9 @@ char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx
 
 char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   char *output_buf = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1983,9 +1983,9 @@ char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_i
 
 int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return -1;
 
@@ -2004,9 +2004,9 @@ int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device
 
 int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return -1;
 
@@ -2025,9 +2025,9 @@ int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int devi
 
 u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2038,9 +2038,9 @@ u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int device_
 
 double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2051,9 +2051,9 @@ double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int
 
 int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2066,9 +2066,9 @@ int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int dev
 
 int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2081,9 +2081,9 @@ int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int dev
 
 int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2094,9 +2094,9 @@ int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 
 int status_get_vector_width_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
diff --git a/src/stdout.c b/src/stdout.c
index dd31d3034..d3fbcb0c7 100644
--- a/src/stdout.c
+++ b/src/stdout.c
@@ -10,7 +10,7 @@
 #include "emu_inc_rp.h"
 #include "emu_inc_rp_optimized.h"
 #include "mpsp.h"
-#include "opencl.h"
+#include "backend.h"
 #include "shared.h"
 #include "stdout.h"
 
diff --git a/src/terminal.c b/src/terminal.c
index 94b5b223a..52cf341e3 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -656,17 +656,17 @@ void example_hashes (hashcat_ctx_t *hashcat_ctx)
 
 void opencl_info (hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   event_log_info (hashcat_ctx, "OpenCL Info:");
   event_log_info (hashcat_ctx, NULL);
 
-  cl_uint         platforms_cnt         = opencl_ctx->platforms_cnt;
-  cl_platform_id *platforms             = opencl_ctx->platforms;
-  char          **platforms_vendor      = opencl_ctx->platforms_vendor;
-  char          **platforms_name        = opencl_ctx->platforms_name;
-  char          **platforms_version     = opencl_ctx->platforms_version;
-  cl_uint         devices_cnt           = opencl_ctx->devices_cnt;
+  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+  cl_platform_id *platforms             = backend_ctx->platforms;
+  char          **platforms_vendor      = backend_ctx->platforms_vendor;
+  char          **platforms_name        = backend_ctx->platforms_name;
+  char          **platforms_version     = backend_ctx->platforms_version;
+  cl_uint         devices_cnt           = backend_ctx->devices_cnt;
 
   for (cl_uint platforms_idx = 0; platforms_idx < platforms_cnt; platforms_idx++)
   {
@@ -683,7 +683,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
     for (cl_uint devices_idx = 0; devices_idx < devices_cnt; devices_idx++)
     {
-      const hc_device_param_t *device_param = opencl_ctx->devices_param + devices_idx;
+      const hc_device_param_t *device_param = backend_ctx->devices_param + devices_idx;
 
       if (device_param->platform != platform_id) continue;
 
@@ -717,18 +717,18 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
 void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   const user_options_t *user_options = hashcat_ctx->user_options;
 
   if (user_options->quiet            == true) return;
   if (user_options->machine_readable == true) return;
   if (user_options->status_json      == true) return;
 
-  cl_uint         platforms_cnt         = opencl_ctx->platforms_cnt;
-  cl_platform_id *platforms             = opencl_ctx->platforms;
-  char          **platforms_vendor      = opencl_ctx->platforms_vendor;
-  bool           *platforms_skipped     = opencl_ctx->platforms_skipped;
-  cl_uint         devices_cnt           = opencl_ctx->devices_cnt;
+  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+  cl_platform_id *platforms             = backend_ctx->platforms;
+  char          **platforms_vendor      = backend_ctx->platforms_vendor;
+  bool           *platforms_skipped     = backend_ctx->platforms_skipped;
+  cl_uint         devices_cnt           = backend_ctx->devices_cnt;
 
   for (cl_uint platforms_idx = 0; platforms_idx < platforms_cnt; platforms_idx++)
   {
@@ -755,7 +755,7 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
 
     for (cl_uint devices_idx = 0; devices_idx < devices_cnt; devices_idx++)
     {
-      const hc_device_param_t *device_param = opencl_ctx->devices_param + devices_idx;
+      const hc_device_param_t *device_param = backend_ctx->devices_param + devices_idx;
 
       if (device_param->platform != platform_id) continue;
 
diff --git a/src/user_options.c b/src/user_options.c
index 755e636db..6b6984ff6 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -11,7 +11,7 @@
 #include "interface.h"
 #include "shared.h"
 #include "usage.h"
-#include "opencl.h"
+#include "backend.h"
 #include "user_options.h"
 
 #ifdef WITH_BRAIN