Added some HIP version checks, fall back to OpenCL automatically

Switched HIP version check from driverVersion to runtimeVersion Fixed syntax check of HAS_VPERM macro in several kernel includes causing invalid error message for AMD GPUs on Windows Updated AMD driver requirements Updated docs/changes.txt with missing changes from previous commits Fixed invalid vector data type in Murmur Hash in -a 3 mode Fixed uninitialized variable warning in src/hashes.c Fixed broken support for --generate-rules-func-min
2025-07-23 23:18:21 +00:00 · 2021-08-04 20:49:22 +02:00 · 2021-08-04 20:49:22 +02:00 · cb69e2d413
commit cb69e2d413
parent 496805cccf
13 changed files with 197 additions and 73 deletions
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@ -1483,7 +1483,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c)
  return r;
 }

-#if HAS_VPERM
+#if HAS_VPERM == 1
 DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c)
 {
  u32x r = 0;
@ -1533,7 +1533,7 @@ DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const int c)
 }
 #endif

-#if HAS_VADD3
+#if HAS_VADD3 == 1
 DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
 {
  /*
@ -2781,7 +2781,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2)
  out1[1] = hc_byte_perm (in[0], 0, 0x3727);
  out1[0] = hc_byte_perm (in[0], 0, 0x1707);

-  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1

  out2[3] = hc_byte_perm (in[3], 0, 0x03070207);
  out2[2] = hc_byte_perm (in[3], 0, 0x01070007);
@ -2819,7 +2819,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2)
  out1[1] = hc_byte_perm (in[0], 0, 0x1707);
  out1[0] = hc_byte_perm (in[0], 0, 0x3727);

-  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1

  out2[3] = hc_byte_perm (in[3], 0, 0x01070007);
  out2[2] = hc_byte_perm (in[3], 0, 0x03070207);
@ -2857,7 +2857,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2)
  out1[1] = hc_byte_perm (in[0], 0, 0x7372);
  out1[0] = hc_byte_perm (in[0], 0, 0x7170);

-  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1

  out2[3] = hc_byte_perm (in[3], 0, 0x07030702);
  out2[2] = hc_byte_perm (in[3], 0, 0x07010700);
@ -2895,7 +2895,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2)
  out1[1] = hc_byte_perm (in[0], 0, 0x7170);
  out1[0] = hc_byte_perm (in[0], 0, 0x7372);

-  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1

  out2[3] = hc_byte_perm (in[3], 0, 0x07010700);
  out2[2] = hc_byte_perm (in[3], 0, 0x07030702);
@ -2929,7 +2929,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out)
  out[2] = hc_byte_perm (in2[0], in2[1], 0x4602);
  out[3] = hc_byte_perm (in2[2], in2[3], 0x4602);

-  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1

  out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002);
  out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002);
@ -2959,7 +2959,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out)
  out[2] = hc_byte_perm (in2[0], in2[1], 0x6420);
  out[3] = hc_byte_perm (in2[2], in2[3], 0x6420);

-  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1

  out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200);
  out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200);
@ -36547,7 +36547,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2)
  out1[1] = hc_byte_perm_S (in[0], 0, 0x3727);
  out1[0] = hc_byte_perm_S (in[0], 0, 0x1707);

-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1

  out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207);
  out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007);
@ -36585,7 +36585,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2)
  out1[1] = hc_byte_perm_S (in[0], 0, 0x7372);
  out1[0] = hc_byte_perm_S (in[0], 0, 0x7170);

-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1

  out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702);
  out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700);
@ -36619,7 +36619,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
  out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
  out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);

-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1

  out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
  out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
@ -36649,7 +36649,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
  out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
  out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);

-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1

  out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
  out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
--- a/OpenCL/inc_hash_blake2b.cl
+++ b/OpenCL/inc_hash_blake2b.cl
@ -24,7 +24,7 @@ DECLSPEC u64 blake2b_rot16_S (const u64 a)

  return out.v64;

-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1

  vconv64_t in;

@ -98,7 +98,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a)

  return out.v64;

-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1

  vconv64_t in;

--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@ -77,7 +77,7 @@ typedef u64  u64x;
 #define make_u64x (u64)

 #else
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP

 #if VECT_SIZE == 2

--- a/OpenCL/m25700_a3-optimized.cl
+++ b/OpenCL/m25700_a3-optimized.cl
@ -14,7 +14,7 @@
 #include "inc_hash_md5.cl"
 #endif

-DECLSPEC u32x MurmurHash_w0 (const u32 seed, const u32x w0, const u32x *w, const int pw_len)
+DECLSPEC u32x MurmurHash_w0 (const u32 seed, const u32x w0, const u32 *w, const int pw_len)
 {
  u32x hash = seed;

--- a/docs/changes.txt
+++ b/docs/changes.txt
@ -4,48 +4,62 @@
 ## Features
 ##

- Added option --multiply-accel-disable (short: -M) to disable multiply the kernel-accel with the multiprocessor count automatism
- HIP Backend: Added support to support HIP 4.4 and later, but added check to rule out older versions because they are incompatible
+- Added option --multiply-accel-disable (short: -M) to disable multiplying of the kernel accel with the multiprocessor count
+- Added rule function '3' to change the case of the first letter after the occurrence of N of character X
+- Added support for auto tuning --kernel-threads (-T) at startup
+- Added support for HIP version 4.3 or later and removed support for older HIP versions as they are not compatible

 ##
 ## Bugs
 ##

- Fixed buffer overflow in Stargazer Stellar Wallet XLM module in hash_encode() if a hash was cracked
- Fixed autotune unitialized tmps variable for slow hashes by calling _init kernel before calling _loop kernel
- Fixed datatype in function sha384_hmac_init_vector_128() that could come into effect if vector datatype was manually set
- Fixed false negative in all VeraCrypt hash-modes if both conditions are met: 1. use CPU for cracking and 2. PIM range was used
- Fixed multiple buffer overflow in DPAPI masterkey file v1 and v2 module
- Fixed out-of-boundary read in input_tokenizer() if the signature in the hash is longer than the length of the plugins' signature constant
- Fixed out-of-boundary read in Stuffit5 module in hash_decode()
+- Fixed buffer overflow in DPAPI masterkey file v1/v2 module in hash_encode() and hash_decode()
+- Fixed buffer overflow in Stargazer Stellar Wallet XLM module in hash_encode() when a hash was cracked
+- Fixed false negative in all VeraCrypt hash modes if both conditions are met: 1. Use CPU for cracking and 2. PIM area was used
+- Fixed invalid data type in the sha384_hmac_init_vector_128() function that take effect if the vector data type was specified manually
+- Fixed out-of-boundary read in input_tokenizer() if the signature in the hash is longer than the length of the plugin's signature constant
+- Fixed out-of-boundary read in the Stuffit5 module in hash_decode()
+- Fixed random rule generator option --generate-rules-func-min by fixing switch() case to not select a not existing option group type
+- Fixed uninitialized tmps variable in autotune for slow hashes by calling _init and _prepare kernel before calling _loop kernel

 ##
-## Improvements
+## Performance
 ##

- AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
- AMD GPUs: On Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
- Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
- Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
- Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
- Slow Kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE
+- AMD GPUs: Add inline assembly code for md5crypt, sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
+- AMD GPUs: On the Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32
+- Backend Interface: Replace most of the blocking Compute API functions with asynchronous ones to improve GPU utilization
+- Blake Kernels: Optimize 3/4 BLAKE2B_ROUND() 64-bit rotations with inline assembly hc_byte_perm_S() calls
+- Blowfish Kernels: Backport optimizations to reduce bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1
+- ECC secp256k1: The inline assembly code for AMD GPUs has been removed as the latest JIT compilers optimize it with the same efficiency
+- HIP Kernels: Enable vector data types for HIP kernels for functionality and performance
+- Kernel threads: Use warp size / wavefront size query instead of hard-coded values as the basis for kernel threads
+- SCRYPT Kernels: Improve Hashcat.hctune entries for many NV and AMD GPUs for hash mode 8900, 9300, 15700 and 22700
+- Tuning Database: Add new module function module_extra_tuningdb_block() to extend hashcat.hctune content from a module

 ##
 ## Technical
 ##

- ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
- AMD Driver: Updated requirement for AMD Linux driver to ROCm 4.4 or later because of new HIP Interface
- AMD Driver: Updated requirement for AMD Windows driver to Adrenalin 21.2.1 or later because of new ADL library
- Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
- ECC secp256k1: Removed the inline assembly code for AMD GPUs because the latest JIT compilers optimize it with the same efficiency
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
- Memory Management: Refactored the code responsible for limiting kernel accel in order to avoid out of -host- memory situations
- SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
- 7-Zip Hook: Increase supported data length from 320kb to 8mb
+- 7-Zip Hook: Increase the supported data length from 320kb to 8mb
+- ADL: Updated support for AMD Display Library to 15.0, updated data types
+- AMD Driver: Updated requirements for AMD Linux drivers to ROCm 4.3 or later due to new HIP interface
+- AMD Driver: Updated requirements for AMD Windows drivers to Adrenalin 21.2.1 or later due to new ADL library
+- Backend Interface: Implement gpu_bzero() as a gpu_memset() replacement, since all gpu_memset() operations used 0 as the value
+- Backend Interface: Improve the query kernel's dynamic memory size based on DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN instead of BF
+- Brain Session: Adds hashconfig-specific opti_type and opts_type parameters to the session calculation to enable cover functions like -O
+- Commandline: Throw an error if the separator specified by the user with the -p option is not exactly 1 byte
+- Constants: Make const char * pointers actually const char * const pointers
+- Filehandling: Use const char for fopen mode to fix -Wwrite-strings warnings
+- Hardware Monitor: Added support for OverDrive 7 and 8 based GPUs
+- HIP Kernels: Dependency on hip/hip runtime.h has been removed to enable easier integration of the HIP backend under Windows
+- Kernel cache: Add kernel threads for hash calculation, which will later be used in the file name of the kernel cache
+- Memory Management: Refactored the code responsible for limiting kernel accel with the goal to avoid low host memory situations
+- OpenCL Runtime: Workaround for Intel OpenCL runtime: segmentation fault when compiling hc_enc_next() / hc_enc_next_global()
+- RC4 Kernels: Use improved native thread derivation for RC4-based hash modes 7500, 13100, 18200, 25400
+- Shared Memory: Calculate the dynamic memory size of the kernel based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
+- Slow kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE to make it easier to handle small word lists
+- Vendor Discovery: Add "Intel" as a valid vendor name for GPUs on macOS

 ##
 ## Algorithms
--- a/docs/readme.txt
+++ b/docs/readme.txt
@ -10,7 +10,7 @@
 hashcat v6.2.3
 ==============

-AMD GPUs on Linux require "AMD ROCm" (4.4 or later)
+AMD GPUs on Linux require "AMD ROCm" (4.3 or later)
 AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (21.2.1 or later)
 Intel CPUs require "OpenCL Runtime for Intel Core and Intel Xeon Processors" (16.1.1 or later)
 NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or later)
--- a/include/backend.h
+++ b/include/backend.h
@ -128,6 +128,7 @@ int hc_hipModuleGetFunction      (hashcat_ctx_t *hashcat_ctx, hipFunction_t *hfu
 int hc_hipModuleGetGlobal        (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
 int hc_hipModuleLoadDataEx       (hashcat_ctx_t *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
 int hc_hipModuleUnload           (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod);
+int hc_hipRuntimeGetVersion      (hashcat_ctx_t *hashcat_ctx, int *runtimeVersion);
 int hc_hipStreamCreate           (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
 int hc_hipStreamDestroy          (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream);
 int hc_hipStreamSynchronize      (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream);
--- a/include/ext_hip.h
+++ b/include/ext_hip.h
@ -344,7 +344,6 @@ typedef enum hipJitOption {

 // stop: hip_runtime_api.h

-
 #ifdef _WIN32
 #define HIPAPI __stdcall
 #else
@ -387,6 +386,7 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION)      (hipFunction_t
 typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL)        (hipDeviceptr_t *, size_t *, hipModule_t, const char *);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX)       (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD)           (hipModule_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION)      (int *);
 typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE)           (hipStream_t *, unsigned int);
 typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY)          (hipStream_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE)      (hipStream_t);
@ -429,6 +429,7 @@ typedef struct hc_hip_lib
  HIP_HIPMODULEGETGLOBAL        hipModuleGetGlobal;
  HIP_HIPMODULELOADDATAEX       hipModuleLoadDataEx;
  HIP_HIPMODULEUNLOAD           hipModuleUnload;
+  HIP_HIPRUNTIMEGETVERSION      hipRuntimeGetVersion;
  HIP_HIPSTREAMCREATE           hipStreamCreate;
  HIP_HIPSTREAMDESTROY          hipStreamDestroy;
  HIP_HIPSTREAMSYNCHRONIZE      hipStreamSynchronize;
--- a/include/types.h
+++ b/include/types.h
@ -1728,6 +1728,7 @@ typedef struct backend_ctx
  int                 rc_hip_init;
  int                 rc_hiprtc_init;

+  int                 hip_runtimeVersion;
  int                 hip_driverVersion;

  // opencl
--- a/src/backend.c
+++ b/src/backend.c
@ -2434,9 +2434,6 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
  hip->lib = hc_dlopen ("amdhip64.dll");
  #else
  hip->lib = hc_dlopen ("libamdhip64.so");
-
-  //TODO: grab the 4 from the major RT version
-  if (hip->lib == NULL) hip->lib = hc_dlopen ("libamdhip64.so.4.2.40200");
  #endif

  if (hip->lib == NULL) return -1;
@ -2498,6 +2495,7 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
  HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal,        hipModuleGetGlobal,         HIP_HIPMODULEGETGLOBAL,         HIP, 1);
  HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx,       hipModuleLoadDataEx,        HIP_HIPMODULELOADDATAEX,        HIP, 1);
  HC_LOAD_FUNC_HIP (hip, hipModuleUnload,           hipModuleUnload,            HIP_HIPMODULEUNLOAD,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion,      hipRuntimeGetVersion,       HIP_HIPRUNTIMEGETVERSION,       HIP, 1);
  HC_LOAD_FUNC_HIP (hip, hipStreamCreate,           hipStreamCreate,            HIP_HIPSTREAMCREATE,            HIP, 1);
  HC_LOAD_FUNC_HIP (hip, hipStreamDestroy,          hipStreamDestroy,           HIP_HIPSTREAMDESTROY,           HIP, 1);
  HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize,      hipStreamSynchronize,       HIP_HIPSTREAMSYNCHRONIZE,       HIP, 1);
@ -3388,6 +3386,33 @@ int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod)
  return 0;
 }

+int hc_hipRuntimeGetVersion (hashcat_ctx_t *hashcat_ctx, int *runtimeVersion)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipRuntimeGetVersion (runtimeVersion);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipRuntimeGetVersion(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipRuntimeGetVersion(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
 {
  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@ -7058,23 +7083,70 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)

      backend_ctx->hip_driverVersion = hip_driverVersion;

-      if (hip_driverVersion < 404)
-      {
-        event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driverVersion);
+      int hip_runtimeVersion;

-        event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions.");
+      if (hc_hipRuntimeGetVersion (hashcat_ctx, &hip_runtimeVersion) == -1) return -1;
+
+      backend_ctx->hip_runtimeVersion = hip_runtimeVersion;
+
+      if (hip_runtimeVersion < 1000)
+      {
+        if (hip_runtimeVersion < 404)
+        {
+          event_log_warning (hashcat_ctx, "Unsupported AMD HIP runtime version '%d' detected! Falling back to OpenCL...", hip_runtimeVersion);
+          event_log_warning (hashcat_ctx, NULL);
+
+          rc_hip_init    = -1;
+          rc_hiprtc_init = -1;
+
+          backend_ctx->rc_hip_init    = rc_hip_init;
+          backend_ctx->rc_hiprtc_init = rc_hiprtc_init;
+
+          backend_ctx->hip    = NULL;
+          backend_ctx->hiprtc = NULL;
+
+          // if we call this, opencl stops working?! so we just zero the pointer
+          // this causes a memleak and an open filehandle but what can we do?
+          // hip_close    (hashcat_ctx);
+          // hiprtc_close (hashcat_ctx);
+        }
+      }
+      else
+      {
+        // we need to wait for 4.4 to be released to continue here
+        // ignore this backend
+
+        event_log_warning (hashcat_ctx, "Unsupported AMD HIP runtime version '%d' detected! Falling back to OpenCL...", hip_runtimeVersion);
        event_log_warning (hashcat_ctx, NULL);

-        return -1;
+        rc_hip_init    = -1;
+        rc_hiprtc_init = -1;
+
+        backend_ctx->rc_hip_init    = rc_hip_init;
+        backend_ctx->rc_hiprtc_init = rc_hiprtc_init;
+
+        backend_ctx->hip = NULL;
+
+        // if we call this, opencl stops working?! so we just zero the pointer
+        // this causes a memleak and an open filehandle but what can we do?
+        // hip_close    (hashcat_ctx);
+        // hiprtc_close (hashcat_ctx);
      }
    }
    else
    {
-      rc_hip_init  = -1;
+      rc_hip_init    = -1;
      rc_hiprtc_init = -1;

-      hip_close  (hashcat_ctx);
-      hiprtc_close (hashcat_ctx);
+      backend_ctx->rc_hip_init    = rc_hip_init;
+      backend_ctx->rc_hiprtc_init = rc_hiprtc_init;
+
+      backend_ctx->hip = NULL;
+
+      // if we call this, opencl stops working?! so we just zero the pointer
+      // this causes a memleak and an open filehandle but what can we do?
+      // hip_close    (hashcat_ctx);
+      // hiprtc_close (hashcat_ctx);
    }
  }

@ -7110,7 +7182,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)

      #if defined (__linux__)
      event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:");
-      event_log_warning (hashcat_ctx, "  \"AMD ROCm\" (4.4 or later)");
+      event_log_warning (hashcat_ctx, "  \"AMD ROCm\" (4.3 or later)");
      #elif defined (_WIN)
      event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:");
      event_log_warning (hashcat_ctx, "  \"AMD Radeon Adrenalin 2020 Edition\" (21.2.1 or later)");
@ -7435,7 +7507,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)

    #if defined (__linux__)
    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:");
-    event_log_warning (hashcat_ctx, "  \"AMD ROCm\" (4.4 or later)");
+    event_log_warning (hashcat_ctx, "  \"AMD ROCm\" (4.3 or later)");
    #elif defined (_WIN)
    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:");
    event_log_warning (hashcat_ctx, "  \"AMD Radeon Adrenalin 2020 Edition\" (21.2.1 or later)");
@ -8178,14 +8250,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
      {
        if ((user_options->force == false) && (user_options->backend_info == false))
        {
-          // HIPDA does not support query nvidia driver version, therefore no driver checks here
+          // CUDA does not support query nvidia driver version, therefore no driver checks here
          // IF needed, could be retrieved using nvmlSystemGetDriverVersion()

          if (device_param->sm_major < 5)
          {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated HIPDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             HIPDA compute capability version 5.0 (Maxwell) or higher.");
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
          }

          if (device_param->kernel_exec_timeout != 0)
@ -10298,6 +10370,14 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p

      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);

+      /* 4.3 linux
+      hiprtc_options[1] = "-I";
+      hiprtc_options[2] = "/opt/rocm/hip/bin/include";
+      hiprtc_options[3] = "-I";
+      hiprtc_options[4] = "/opt/rocm/include";
+      hiprtc_options[5] = "-I";
+      */
+
      hiprtc_options[1] = "-nocudainc";
      hiprtc_options[2] = "-nocudalib";
      hiprtc_options[3] = "";
@ -11320,7 +11400,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u",
      backend_ctx->comptime,
      backend_ctx->cuda_driver_version,
-      backend_ctx->hip_driverVersion,
+      backend_ctx->hip_runtimeVersion,
      device_param->is_opencl,
      device_param->opencl_platform_vendor_id,
      device_param->device_name,
@ -11662,7 +11742,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s",
        backend_ctx->comptime,
        backend_ctx->cuda_driver_version,
-        backend_ctx->hip_driverVersion,
+        backend_ctx->hip_runtimeVersion,
        device_param->is_opencl,
        device_param->opencl_platform_vendor_id,
        device_param->device_name,
--- a/src/hashes.c
+++ b/src/hashes.c
@ -314,7 +314,8 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
  void *tmps = NULL;

  cl_event opencl_event;
-  int rc;
+
+  int rc = -1;

  if (hashconfig->opts_type & OPTS_TYPE_COPY_TMPS)
  {
@ -556,7 +557,8 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
  user_options_t *user_options = hashcat_ctx->user_options;

  u32 num_cracked = 0;
-  int rc;
+
+  int rc = -1;

  if (device_param->is_cuda == true)
  {
--- a/src/rp.c
+++ b/src/rp.c
@ -141,7 +141,7 @@ int generate_random_rule (char rule_buf[RP_RULE_SIZE], const u32 rp_gen_func_min
    u32 p1 = 0;
    u32 p2 = 0;

-    switch ((char) get_random_num (0, 9))
+    switch ((char) get_random_num (0, 8))
    {
      case 0:
        r = get_random_num (0, sizeof (grp_op_nop));
--- a/src/terminal.c
+++ b/src/terminal.c
@ -818,10 +818,22 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
    event_log_info (hashcat_ctx, NULL);

    int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
-    int hip_driverVersion  = backend_ctx->hip_driverVersion;
+    int hip_runtimeVersion = backend_ctx->hip_runtimeVersion;

-    event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driverVersion / 100, hip_driverVersion % 10);
-    event_log_info (hashcat_ctx, NULL);
+    if (hip_runtimeVersion > 1000)
+    {
+      int hip_version_major = (hip_runtimeVersion - 0) / 10000000;
+      int hip_version_minor = (hip_runtimeVersion - (hip_version_major * 10000000)) / 100000;
+      int hip_version_patch = (hip_runtimeVersion - (hip_version_major * 10000000) - (hip_version_minor * 100000));
+
+      event_log_info (hashcat_ctx, "HIP.Version.: %d.%d.%d", hip_version_major, hip_version_minor, hip_version_patch);
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_runtimeVersion / 100, hip_runtimeVersion % 10);
+      event_log_info (hashcat_ctx, NULL);
+    }

    for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++)
    {
@ -1014,10 +1026,23 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx)

  if (backend_ctx->hip)
  {
-    int hip_devices_cnt   = backend_ctx->hip_devices_cnt;
-    int hip_driverVersion = backend_ctx->hip_driverVersion;
+    int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
+    int hip_runtimeVersion = backend_ctx->hip_runtimeVersion;

-    const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driverVersion / 100, hip_driverVersion % 10);
+    size_t len;
+
+    if (hip_runtimeVersion > 1000)
+    {
+      int hip_version_major = (hip_runtimeVersion - 0) / 10000000;
+      int hip_version_minor = (hip_runtimeVersion - (hip_version_major * 10000000)) / 100000;
+      int hip_version_patch = (hip_runtimeVersion - (hip_version_major * 10000000) - (hip_version_minor * 100000));
+
+      len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d.%d)", hip_version_major, hip_version_minor, hip_version_patch);
+    }
+    else
+    {
+      len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_runtimeVersion / 100, hip_runtimeVersion % 10);
+    }

    char line[HCBUFSIZ_TINY] = { 0 };