From 14f5a26027399f7178e60b3d652590a02462dccd Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Sun, 18 Jul 2021 12:27:21 -0700
Subject: [PATCH 01/24] use const char for fopen mode

Fixes -Wwrite-strings warnings.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
---
 include/filehandling.h | 4 ++--
 include/types.h        | 2 +-
 src/filehandling.c     | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/filehandling.h b/include/filehandling.h
index 1d13097e4..db03ac456 100644
--- a/include/filehandling.h
+++ b/include/filehandling.h
@@ -15,8 +15,8 @@
 int    _wopen       (const char *path, int oflag, ...);
 #endif
 
-bool   hc_fopen     (HCFILE *fp, const char *path, char *mode);
-bool   hc_fopen_raw (HCFILE *fp, const char *path, char *mode);
+bool   hc_fopen     (HCFILE *fp, const char *path, const char *mode);
+bool   hc_fopen_raw (HCFILE *fp, const char *path, const char *mode);
 int    hc_fscanf    (HCFILE *fp, const char *format, void *ptr);
 int    hc_fprintf   (HCFILE *fp, const char *format, ...);
 int    hc_vfprintf  (HCFILE *fp, const char *format, va_list ap);
diff --git a/include/types.h b/include/types.h
index ed22a95ee..9b58e6c6a 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1075,7 +1075,7 @@ typedef struct hc_fp
   bool        is_zip;
   int         bom_size;
 
-  char       *mode;
+  const char *mode;
   const char *path;
 
 } HCFILE;
diff --git a/src/filehandling.c b/src/filehandling.c
index 54ba73369..9edd1cb35 100644
--- a/src/filehandling.c
+++ b/src/filehandling.c
@@ -21,7 +21,7 @@ int _wopen (const char *path, int oflag, ...)
 }
 #endif
 
-bool hc_fopen (HCFILE *fp, const char *path, char *mode)
+bool hc_fopen (HCFILE *fp, const char *path, const char *mode)
 {
   if (path == NULL || mode == NULL) return false;
 
@@ -130,7 +130,7 @@ bool hc_fopen (HCFILE *fp, const char *path, char *mode)
   return true;
 }
 
-bool hc_fopen_raw (HCFILE *fp, const char *path, char *mode)
+bool hc_fopen_raw (HCFILE *fp, const char *path, const char *mode)
 {
   if (path == NULL || mode == NULL) return false;
 

From a9ceb6377e9e9f567d418c0bb2992d44eebff985 Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Sun, 18 Jul 2021 12:33:36 -0700
Subject: [PATCH 02/24] add missing const

Fixes -Wwrite-strings issue.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
---
 src/terminal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/terminal.c b/src/terminal.c
index 167b8b821..b72e7d9e4 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -640,7 +640,7 @@ void hash_info_single (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *user_op
     if (hashconfig->is_salted == true)
     {
       u32 t = hashconfig->salt_type;
-      char *t_desc = (t == SALT_TYPE_EMBEDDED) ? "Embedded\0" : (t == SALT_TYPE_GENERIC) ? "Generic\0" : "Virtual\0";
+      const char *t_desc = (t == SALT_TYPE_EMBEDDED) ? "Embedded\0" : (t == SALT_TYPE_GENERIC) ? "Generic\0" : "Virtual\0";
       event_log_info (hashcat_ctx, "  Salt.Type...........: %s", t_desc);
       event_log_info (hashcat_ctx, "  Salt.Len.Min........: %d", hashconfig->salt_min);
       event_log_info (hashcat_ctx, "  Salt.Len.Max........: %d", hashconfig->salt_max);

From adaf3f293b566e2ae12285b698ed6d22bbbebb8c Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Sun, 18 Jul 2021 12:48:54 -0700
Subject: [PATCH 03/24] make const char pointers actually const

const char* is a non const pointer that points to const data. Add
missing const.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
---
 src/backend.c      |   6 +-
 src/brain.c        |   2 +-
 src/hlfmt.c        |  20 ++---
 src/mpsp.c         |   2 +-
 src/shared.c       | 184 ++++++++++++++++++++++-----------------------
 src/status.c       |  40 +++++-----
 src/terminal.c     |   4 +-
 src/user_options.c |  20 ++---
 8 files changed, 139 insertions(+), 139 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 31d6bade1..6430cef39 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -26,10 +26,10 @@
 #include "terminal.h"
 
 #if defined (__linux__)
-static const char *dri_card0_path = "/dev/dri/card0";
+static const char *const  dri_card0_path = "/dev/dri/card0";
 
-static const char *drm_card0_vendor_path = "/sys/class/drm/card0/device/vendor";
-static const char *drm_card0_driver_path = "/sys/class/drm/card0/device/driver";
+static const char *const  drm_card0_vendor_path = "/sys/class/drm/card0/device/vendor";
+static const char *const  drm_card0_driver_path = "/sys/class/drm/card0/device/driver";
 #endif
 
 static const u32 full01 = 0x01010101;
diff --git a/src/brain.c b/src/brain.c
index 2962eaa2e..a12a1375b 100644
--- a/src/brain.c
+++ b/src/brain.c
@@ -666,7 +666,7 @@ u32 brain_auth_challenge (void)
 
   #else
 
-  static const char *urandom = "/dev/urandom";
+  static const char *const urandom = "/dev/urandom";
 
   HCFILE fp;
 
diff --git a/src/hlfmt.c b/src/hlfmt.c
index 0e803445b..be71742e2 100644
--- a/src/hlfmt.c
+++ b/src/hlfmt.c
@@ -10,16 +10,16 @@
 #include "hlfmt.h"
 #include "shared.h"
 
-static const char *HLFMT_TEXT_HASHCAT  = "native hashcat";
-static const char *HLFMT_TEXT_PWDUMP   = "pwdump";
-static const char *HLFMT_TEXT_PASSWD   = "passwd";
-static const char *HLFMT_TEXT_SHADOW   = "shadow";
-static const char *HLFMT_TEXT_DCC      = "DCC";
-static const char *HLFMT_TEXT_DCC2     = "DCC 2";
-static const char *HLFMT_TEXT_NETNTLM1 = "NetNTLMv1";
-static const char *HLFMT_TEXT_NETNTLM2 = "NetNTLMv2";
-static const char *HLFMT_TEXT_NSLDAP   = "nsldap";
-static const char *HLFMT_TEXT_NSLDAPS  = "nsldaps";
+static const char *const HLFMT_TEXT_HASHCAT  = "native hashcat";
+static const char *const HLFMT_TEXT_PWDUMP   = "pwdump";
+static const char *const HLFMT_TEXT_PASSWD   = "passwd";
+static const char *const HLFMT_TEXT_SHADOW   = "shadow";
+static const char *const HLFMT_TEXT_DCC      = "DCC";
+static const char *const HLFMT_TEXT_DCC2     = "DCC 2";
+static const char *const HLFMT_TEXT_NETNTLM1 = "NetNTLMv1";
+static const char *const HLFMT_TEXT_NETNTLM2 = "NetNTLMv2";
+static const char *const HLFMT_TEXT_NSLDAP   = "nsldap";
+static const char *const HLFMT_TEXT_NSLDAPS  = "nsldaps";
 
 // hlfmt hashcat
 
diff --git a/src/mpsp.c b/src/mpsp.c
index 67168e189..d096b97b5 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -16,7 +16,7 @@
 #include "ext_lzma.h"
 #include "mpsp.h"
 
-static const char *DEF_MASK = "?1?2?2?2?2?2?2?3?3?3?3?d?d?d?d";
+static const char *const DEF_MASK = "?1?2?2?2?2?2?2?3?3?3?3?d?d?d?d";
 
 #define MAX_MFS 5 // 4*charset, 1*mask
 
diff --git a/src/shared.c b/src/shared.c
index b101cc01f..7efdd4d53 100644
--- a/src/shared.c
+++ b/src/shared.c
@@ -15,97 +15,97 @@
 #include <sys/cygwin.h>
 #endif
 
-static const char *PA_000 = "OK";
-static const char *PA_001 = "Ignored due to comment";
-static const char *PA_002 = "Ignored due to zero length";
-static const char *PA_003 = "Line-length exception";
-static const char *PA_004 = "Hash-length exception";
-static const char *PA_005 = "Hash-value exception";
-static const char *PA_006 = "Salt-length exception";
-static const char *PA_007 = "Salt-value exception";
-static const char *PA_008 = "Salt-iteration count exception";
-static const char *PA_009 = "Separator unmatched";
-static const char *PA_010 = "Signature unmatched";
-static const char *PA_011 = "Invalid hccapx file size";
-static const char *PA_012 = "Invalid hccapx eapol size";
-static const char *PA_013 = "Invalid psafe2 filesize";
-static const char *PA_014 = "Invalid psafe3 filesize";
-static const char *PA_015 = "Invalid truecrypt filesize";
-static const char *PA_016 = "Invalid veracrypt filesize";
-static const char *PA_017 = "Invalid SIP directive, only MD5 is supported";
-static const char *PA_018 = "Hash-file exception";
-static const char *PA_019 = "Hash-encoding exception";
-static const char *PA_020 = "Salt-encoding exception";
-static const char *PA_021 = "Invalid LUKS filesize";
-static const char *PA_022 = "Invalid LUKS identifier";
-static const char *PA_023 = "Invalid LUKS version";
-static const char *PA_024 = "Invalid or unsupported LUKS cipher type";
-static const char *PA_025 = "Invalid or unsupported LUKS cipher mode";
-static const char *PA_026 = "Invalid or unsupported LUKS hash type";
-static const char *PA_027 = "Invalid LUKS key size";
-static const char *PA_028 = "Disabled LUKS key detected";
-static const char *PA_029 = "Invalid LUKS key AF stripes count";
-static const char *PA_030 = "Invalid combination of LUKS hash type and cipher type";
-static const char *PA_031 = "Invalid hccapx signature";
-static const char *PA_032 = "Invalid hccapx version";
-static const char *PA_033 = "Invalid hccapx message pair";
-static const char *PA_034 = "Token encoding exception";
-static const char *PA_035 = "Token length exception";
-static const char *PA_036 = "Insufficient entropy exception";
-static const char *PA_037 = "Hash contains unsupported compression type for current mode";
-static const char *PA_038 = "Invalid key size";
-static const char *PA_039 = "Invalid block size";
-static const char *PA_040 = "Invalid or unsupported cipher";
-static const char *PA_041 = "Invalid filesize";
-static const char *PA_042 = "IV length exception";
-static const char *PA_043 = "CT length exception";
-static const char *PA_255 = "Unknown error";
-
-static const char *OPTI_STR_OPTIMIZED_KERNEL     = "Optimized-Kernel";
-static const char *OPTI_STR_ZERO_BYTE            = "Zero-Byte";
-static const char *OPTI_STR_PRECOMPUTE_INIT      = "Precompute-Init";
-static const char *OPTI_STR_MEET_IN_MIDDLE       = "Meet-In-The-Middle";
-static const char *OPTI_STR_EARLY_SKIP           = "Early-Skip";
-static const char *OPTI_STR_NOT_SALTED           = "Not-Salted";
-static const char *OPTI_STR_NOT_ITERATED         = "Not-Iterated";
-static const char *OPTI_STR_PREPENDED_SALT       = "Prepended-Salt";
-static const char *OPTI_STR_APPENDED_SALT        = "Appended-Salt";
-static const char *OPTI_STR_SINGLE_HASH          = "Single-Hash";
-static const char *OPTI_STR_SINGLE_SALT          = "Single-Salt";
-static const char *OPTI_STR_BRUTE_FORCE          = "Brute-Force";
-static const char *OPTI_STR_RAW_HASH             = "Raw-Hash";
-static const char *OPTI_STR_SLOW_HASH_SIMD_INIT  = "Slow-Hash-SIMD-INIT";
-static const char *OPTI_STR_SLOW_HASH_SIMD_LOOP  = "Slow-Hash-SIMD-LOOP";
-static const char *OPTI_STR_SLOW_HASH_SIMD_COMP  = "Slow-Hash-SIMD-COMP";
-static const char *OPTI_STR_USES_BITS_8          = "Uses-8-Bit";
-static const char *OPTI_STR_USES_BITS_16         = "Uses-16-Bit";
-static const char *OPTI_STR_USES_BITS_32         = "Uses-32-Bit";
-static const char *OPTI_STR_USES_BITS_64         = "Uses-64-Bit";
-
-static const char *HASH_CATEGORY_UNDEFINED_STR              = "Undefined";
-static const char *HASH_CATEGORY_RAW_HASH_STR               = "Raw Hash";
-static const char *HASH_CATEGORY_RAW_HASH_SALTED_STR        = "Raw Hash, Salted and/or Iterated";
-static const char *HASH_CATEGORY_RAW_HASH_AUTHENTICATED_STR = "Raw Hash, Authenticated";
-static const char *HASH_CATEGORY_RAW_CIPHER_KPA_STR         = "Raw Cipher, Known-Plaintext attack";
-static const char *HASH_CATEGORY_GENERIC_KDF_STR            = "Generic KDF";
-static const char *HASH_CATEGORY_NETWORK_PROTOCOL_STR       = "Network Protocols";
-static const char *HASH_CATEGORY_FORUM_SOFTWARE_STR         = "Forums, CMS, E-Commerce";
-static const char *HASH_CATEGORY_DATABASE_SERVER_STR        = "Database Server";
-static const char *HASH_CATEGORY_NETWORK_SERVER_STR         = "FTP, HTTP, SMTP, LDAP Server";
-static const char *HASH_CATEGORY_RAW_CHECKSUM_STR           = "Raw Checksum";
-static const char *HASH_CATEGORY_OS_STR                     = "Operating System";
-static const char *HASH_CATEGORY_EAS_STR                    = "Enterprise Application Software (EAS)";
-static const char *HASH_CATEGORY_ARCHIVE_STR                = "Archives";
-static const char *HASH_CATEGORY_FDE_STR                    = "Full-Disk Encryption (FDE)";
-static const char *HASH_CATEGORY_FBE_STR                    = "File-Based Encryption (FBE)";
-static const char *HASH_CATEGORY_DOCUMENTS_STR              = "Documents";
-static const char *HASH_CATEGORY_PASSWORD_MANAGER_STR       = "Password Managers";
-static const char *HASH_CATEGORY_OTP_STR                    = "One-Time Passwords";
-static const char *HASH_CATEGORY_PLAIN_STR                  = "Plaintext";
-static const char *HASH_CATEGORY_FRAMEWORK_STR              = "Framework";
-static const char *HASH_CATEGORY_PRIVATE_KEY_STR            = "Private Key";
-static const char *HASH_CATEGORY_IMS_STR                    = "Instant Messaging Service";
-static const char *HASH_CATEGORY_CRYPTOCURRENCY_WALLET_STR  = "Cryptocurrency Wallet";
+static const char *const PA_000 = "OK";
+static const char *const PA_001 = "Ignored due to comment";
+static const char *const PA_002 = "Ignored due to zero length";
+static const char *const PA_003 = "Line-length exception";
+static const char *const PA_004 = "Hash-length exception";
+static const char *const PA_005 = "Hash-value exception";
+static const char *const PA_006 = "Salt-length exception";
+static const char *const PA_007 = "Salt-value exception";
+static const char *const PA_008 = "Salt-iteration count exception";
+static const char *const PA_009 = "Separator unmatched";
+static const char *const PA_010 = "Signature unmatched";
+static const char *const PA_011 = "Invalid hccapx file size";
+static const char *const PA_012 = "Invalid hccapx eapol size";
+static const char *const PA_013 = "Invalid psafe2 filesize";
+static const char *const PA_014 = "Invalid psafe3 filesize";
+static const char *const PA_015 = "Invalid truecrypt filesize";
+static const char *const PA_016 = "Invalid veracrypt filesize";
+static const char *const PA_017 = "Invalid SIP directive, only MD5 is supported";
+static const char *const PA_018 = "Hash-file exception";
+static const char *const PA_019 = "Hash-encoding exception";
+static const char *const PA_020 = "Salt-encoding exception";
+static const char *const PA_021 = "Invalid LUKS filesize";
+static const char *const PA_022 = "Invalid LUKS identifier";
+static const char *const PA_023 = "Invalid LUKS version";
+static const char *const PA_024 = "Invalid or unsupported LUKS cipher type";
+static const char *const PA_025 = "Invalid or unsupported LUKS cipher mode";
+static const char *const PA_026 = "Invalid or unsupported LUKS hash type";
+static const char *const PA_027 = "Invalid LUKS key size";
+static const char *const PA_028 = "Disabled LUKS key detected";
+static const char *const PA_029 = "Invalid LUKS key AF stripes count";
+static const char *const PA_030 = "Invalid combination of LUKS hash type and cipher type";
+static const char *const PA_031 = "Invalid hccapx signature";
+static const char *const PA_032 = "Invalid hccapx version";
+static const char *const PA_033 = "Invalid hccapx message pair";
+static const char *const PA_034 = "Token encoding exception";
+static const char *const PA_035 = "Token length exception";
+static const char *const PA_036 = "Insufficient entropy exception";
+static const char *const PA_037 = "Hash contains unsupported compression type for current mode";
+static const char *const PA_038 = "Invalid key size";
+static const char *const PA_039 = "Invalid block size";
+static const char *const PA_040 = "Invalid or unsupported cipher";
+static const char *const PA_041 = "Invalid filesize";
+static const char *const PA_042 = "IV length exception";
+static const char *const PA_043 = "CT length exception";
+static const char *const PA_255 = "Unknown error";
+
+static const char *const OPTI_STR_OPTIMIZED_KERNEL     = "Optimized-Kernel";
+static const char *const OPTI_STR_ZERO_BYTE            = "Zero-Byte";
+static const char *const OPTI_STR_PRECOMPUTE_INIT      = "Precompute-Init";
+static const char *const OPTI_STR_MEET_IN_MIDDLE       = "Meet-In-The-Middle";
+static const char *const OPTI_STR_EARLY_SKIP           = "Early-Skip";
+static const char *const OPTI_STR_NOT_SALTED           = "Not-Salted";
+static const char *const OPTI_STR_NOT_ITERATED         = "Not-Iterated";
+static const char *const OPTI_STR_PREPENDED_SALT       = "Prepended-Salt";
+static const char *const OPTI_STR_APPENDED_SALT        = "Appended-Salt";
+static const char *const OPTI_STR_SINGLE_HASH          = "Single-Hash";
+static const char *const OPTI_STR_SINGLE_SALT          = "Single-Salt";
+static const char *const OPTI_STR_BRUTE_FORCE          = "Brute-Force";
+static const char *const OPTI_STR_RAW_HASH             = "Raw-Hash";
+static const char *const OPTI_STR_SLOW_HASH_SIMD_INIT  = "Slow-Hash-SIMD-INIT";
+static const char *const OPTI_STR_SLOW_HASH_SIMD_LOOP  = "Slow-Hash-SIMD-LOOP";
+static const char *const OPTI_STR_SLOW_HASH_SIMD_COMP  = "Slow-Hash-SIMD-COMP";
+static const char *const OPTI_STR_USES_BITS_8          = "Uses-8-Bit";
+static const char *const OPTI_STR_USES_BITS_16         = "Uses-16-Bit";
+static const char *const OPTI_STR_USES_BITS_32         = "Uses-32-Bit";
+static const char *const OPTI_STR_USES_BITS_64         = "Uses-64-Bit";
+
+static const char *const HASH_CATEGORY_UNDEFINED_STR              = "Undefined";
+static const char *const HASH_CATEGORY_RAW_HASH_STR               = "Raw Hash";
+static const char *const HASH_CATEGORY_RAW_HASH_SALTED_STR        = "Raw Hash, Salted and/or Iterated";
+static const char *const HASH_CATEGORY_RAW_HASH_AUTHENTICATED_STR = "Raw Hash, Authenticated";
+static const char *const HASH_CATEGORY_RAW_CIPHER_KPA_STR         = "Raw Cipher, Known-Plaintext attack";
+static const char *const HASH_CATEGORY_GENERIC_KDF_STR            = "Generic KDF";
+static const char *const HASH_CATEGORY_NETWORK_PROTOCOL_STR       = "Network Protocols";
+static const char *const HASH_CATEGORY_FORUM_SOFTWARE_STR         = "Forums, CMS, E-Commerce";
+static const char *const HASH_CATEGORY_DATABASE_SERVER_STR        = "Database Server";
+static const char *const HASH_CATEGORY_NETWORK_SERVER_STR         = "FTP, HTTP, SMTP, LDAP Server";
+static const char *const HASH_CATEGORY_RAW_CHECKSUM_STR           = "Raw Checksum";
+static const char *const HASH_CATEGORY_OS_STR                     = "Operating System";
+static const char *const HASH_CATEGORY_EAS_STR                    = "Enterprise Application Software (EAS)";
+static const char *const HASH_CATEGORY_ARCHIVE_STR                = "Archives";
+static const char *const HASH_CATEGORY_FDE_STR                    = "Full-Disk Encryption (FDE)";
+static const char *const HASH_CATEGORY_FBE_STR                    = "File-Based Encryption (FBE)";
+static const char *const HASH_CATEGORY_DOCUMENTS_STR              = "Documents";
+static const char *const HASH_CATEGORY_PASSWORD_MANAGER_STR       = "Password Managers";
+static const char *const HASH_CATEGORY_OTP_STR                    = "One-Time Passwords";
+static const char *const HASH_CATEGORY_PLAIN_STR                  = "Plaintext";
+static const char *const HASH_CATEGORY_FRAMEWORK_STR              = "Framework";
+static const char *const HASH_CATEGORY_PRIVATE_KEY_STR            = "Private Key";
+static const char *const HASH_CATEGORY_IMS_STR                    = "Instant Messaging Service";
+static const char *const HASH_CATEGORY_CRYPTOCURRENCY_WALLET_STR  = "Cryptocurrency Wallet";
 
 int sort_by_string_sized (const void *p1, const void *p2)
 {
@@ -1062,7 +1062,7 @@ static int rounds_count_length (const char *input_buf, const int input_len)
 {
   if (input_len >= 9) // 9 is minimum because of "rounds=X$"
   {
-    static const char *rounds = "rounds=";
+    static const char *const rounds = "rounds=";
 
     if (memcmp (input_buf, rounds, 7) == 0)
     {
diff --git a/src/status.c b/src/status.c
index 6b4084b59..c26e43e51 100644
--- a/src/status.c
+++ b/src/status.c
@@ -18,29 +18,29 @@
 #include "shared.h"
 #include "status.h"
 
-static const char *ST_0000 = "Initializing";
-static const char *ST_0001 = "Autotuning";
-static const char *ST_0002 = "Selftest";
-static const char *ST_0003 = "Running";
-static const char *ST_0004 = "Paused";
-static const char *ST_0005 = "Exhausted";
-static const char *ST_0006 = "Cracked";
-static const char *ST_0007 = "Aborted";
-static const char *ST_0008 = "Quit";
-static const char *ST_0009 = "Bypass";
-static const char *ST_0010 = "Aborted (Checkpoint)";
-static const char *ST_0011 = "Aborted (Runtime)";
-static const char *ST_0012 = "Running (Checkpoint Quit requested)";
-static const char *ST_0013 = "Error";
-static const char *ST_0014 = "Aborted (Finish)";
-static const char *ST_0015 = "Running (Quit after attack requested)";
-static const char *ST_0016 = "Autodetect";
-static const char *ST_9999 = "Unknown! Bug!";
+static const char *const  ST_0000 = "Initializing";
+static const char *const  ST_0001 = "Autotuning";
+static const char *const  ST_0002 = "Selftest";
+static const char *const  ST_0003 = "Running";
+static const char *const  ST_0004 = "Paused";
+static const char *const  ST_0005 = "Exhausted";
+static const char *const  ST_0006 = "Cracked";
+static const char *const  ST_0007 = "Aborted";
+static const char *const  ST_0008 = "Quit";
+static const char *const  ST_0009 = "Bypass";
+static const char *const  ST_0010 = "Aborted (Checkpoint)";
+static const char *const  ST_0011 = "Aborted (Runtime)";
+static const char *const  ST_0012 = "Running (Checkpoint Quit requested)";
+static const char *const  ST_0013 = "Error";
+static const char *const  ST_0014 = "Aborted (Finish)";
+static const char *const  ST_0015 = "Running (Quit after attack requested)";
+static const char *const  ST_0016 = "Autodetect";
+static const char *const  ST_9999 = "Unknown! Bug!";
 
 static const char UNITS[7] = { ' ', 'k', 'M', 'G', 'T', 'P', 'E' };
 
-static const char *ETA_ABSOLUTE_MAX_EXCEEDED = "Next Big Bang"; // in honor of ighashgpu
-static const char *ETA_RELATIVE_MAX_EXCEEDED = "> 10 years";
+static const char *const  ETA_ABSOLUTE_MAX_EXCEEDED = "Next Big Bang"; // in honor of ighashgpu
+static const char *const  ETA_RELATIVE_MAX_EXCEEDED = "> 10 years";
 
 static char *status_get_rules_file (const hashcat_ctx_t *hashcat_ctx)
 {
diff --git a/src/terminal.c b/src/terminal.c
index b72e7d9e4..d4bc98b4e 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -19,8 +19,8 @@
 
 static const size_t TERMINAL_LINE_LENGTH = 79;
 
-static const char *PROMPT_ACTIVE = "[s]tatus [p]ause [b]ypass [c]heckpoint [f]inish [q]uit => ";
-static const char *PROMPT_PAUSED = "[s]tatus [r]esume [b]ypass [c]heckpoint [f]inish [q]uit => ";
+static const char *const PROMPT_ACTIVE = "[s]tatus [p]ause [b]ypass [c]heckpoint [f]inish [q]uit => ";
+static const char *const PROMPT_PAUSED = "[s]tatus [r]esume [b]ypass [c]heckpoint [f]inish [q]uit => ";
 
 void welcome_screen (hashcat_ctx_t *hashcat_ctx, const char *version_tag)
 {
diff --git a/src/user_options.c b/src/user_options.c
index 14a75c8bd..77d989376 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -20,12 +20,12 @@
 #endif
 
 #ifdef WITH_BRAIN
-static const char *short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSz";
+static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSz";
 #else
-static const char *short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMS";
+static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMS";
 #endif
 
-static char *SEPARATOR = ":";
+static char *const SEPARATOR = ":";
 
 static const struct option long_options[] =
 {
@@ -147,15 +147,15 @@ static const struct option long_options[] =
   {NULL,                        0,                 NULL, 0 }
 };
 
-static const char *ENCODING_FROM = "utf-8";
-static const char *ENCODING_TO   = "utf-8";
+static const char *const ENCODING_FROM = "utf-8";
+static const char *const ENCODING_TO   = "utf-8";
 
-static const char *RULE_BUF_R = ":";
-static const char *RULE_BUF_L = ":";
+static const char *const RULE_BUF_R = ":";
+static const char *const RULE_BUF_L = ":";
 
-static const char *DEF_MASK_CS_1 = "?l?d?u";
-static const char *DEF_MASK_CS_2 = "?l?d";
-static const char *DEF_MASK_CS_3 = "?l?d*!$@_";
+static const char *const DEF_MASK_CS_1 = "?l?d?u";
+static const char *const DEF_MASK_CS_2 = "?l?d";
+static const char *const DEF_MASK_CS_3 = "?l?d*!$@_";
 
 int user_options_init (hashcat_ctx_t *hashcat_ctx)
 {

From 5ffcaa980d0e4d95cd573bebffa35e66883b7432 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 23 Jul 2021 16:04:34 +0200
Subject: [PATCH 04/24] HIP Backend: Added support to support HIP 4.4 and
 later, but added check to rule out older versions because they are
 incompatible

---
 OpenCL/inc_platform.cl |   25 +-
 OpenCL/inc_types.h     |   87 +--
 OpenCL/m08900-pure.cl  |   13 +-
 OpenCL/m15700-pure.cl  |   13 +-
 OpenCL/m22700-pure.cl  |   13 +-
 docs/changes.txt       |    1 +
 include/backend.h      |   91 ++-
 include/ext_hip.h      | 1437 +++++++++++-----------------------------
 include/ext_hiprtc.h   |   61 +-
 include/types.h        |  151 +++--
 src/backend.c          |  973 ++++++++-------------------
 src/terminal.c         |   10 +-
 12 files changed, 823 insertions(+), 2052 deletions(-)

diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index df0e210a4..47cfc9a84 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -193,33 +193,21 @@ DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
 {
   volatile const u32 val = 1;
 
-  return __atomic_fetch_sub (p, val, __ATOMIC_RELAXED);
+  return atomicSub (p, val);
 }
 
 DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
 {
   volatile const u32 val = 1;
 
-  return __atomic_fetch_add (p, val, __ATOMIC_RELAXED);
+  return atomicAdd (p, val);
 }
 
 DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
 {
-  return __atomic_fetch_or (p, val, __ATOMIC_RELAXED);
-}
-
-extern "C" __device__ __attribute__((pure)) double __ocml_log2_f64(double);
-
-DECLSPEC double log2 (double x)
-{
-  return __ocml_log2_f64 (x);
+  return atomicOr (p, val);
 }
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
-
 DECLSPEC size_t get_global_id  (const u32 dimindx)
 {
   return (__ockl_get_group_id (dimindx) * __ockl_get_local_size (dimindx)) + __ockl_get_local_id (dimindx);
@@ -308,11 +296,8 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
   return out.v64;
 }
 
-extern "C" __device__ int printf(const char *fmt, ...);
-//int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
-
-#define FIXED_THREAD_COUNT(n) __attribute__((amdgpu_flat_work_group_size (1, (n))))
-#define SYNC_THREADS() __builtin_amdgcn_s_barrier ()
+#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
+#define SYNC_THREADS() __syncthreads ()
 #endif
 
 #ifdef IS_OPENCL
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 0c715da66..4caf595de 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -21,96 +21,19 @@
 typedef unsigned char       uchar;
 typedef unsigned short      ushort;
 typedef unsigned int        uint;
-typedef unsigned long long  ulong;
+typedef unsigned long       ulong;
+typedef unsigned long long  ullong;
 #endif
 
-#ifdef IS_HIP
-// https://github.com/llvm-mirror/clang/blob/master/lib/Headers/opencl-c-base.h
-
-// built-in scalar data types:
-
-/**
- * An unsigned 8-bit integer.
- */
-typedef unsigned char uchar;
-
-/**
- * An unsigned 16-bit integer.
- */
-typedef unsigned short ushort;
-
-/**
- * An unsigned 32-bit integer.
- */
-typedef unsigned int uint;
-
-/**
- * An unsigned 64-bit integer.
- */
-typedef unsigned long ulong;
-
-/**
- * The unsigned integer type of the result of the sizeof operator. This
- * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS
- * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if
- * CL_DEVICE_ADDRESS_BITS is 64-bits.
- */
-typedef __SIZE_TYPE__ size_t;
-
-// built-in vector data types:
-typedef char char2 __attribute__((ext_vector_type(2)));
-typedef char char3 __attribute__((ext_vector_type(3)));
-typedef char char4 __attribute__((ext_vector_type(4)));
-typedef char char8 __attribute__((ext_vector_type(8)));
-typedef char char16 __attribute__((ext_vector_type(16)));
-typedef uchar uchar2 __attribute__((ext_vector_type(2)));
-typedef uchar uchar3 __attribute__((ext_vector_type(3)));
-typedef uchar uchar4 __attribute__((ext_vector_type(4)));
-typedef uchar uchar8 __attribute__((ext_vector_type(8)));
-typedef uchar uchar16 __attribute__((ext_vector_type(16)));
-typedef short short2 __attribute__((ext_vector_type(2)));
-typedef short short3 __attribute__((ext_vector_type(3)));
-typedef short short4 __attribute__((ext_vector_type(4)));
-typedef short short8 __attribute__((ext_vector_type(8)));
-typedef short short16 __attribute__((ext_vector_type(16)));
-typedef ushort ushort2 __attribute__((ext_vector_type(2)));
-typedef ushort ushort3 __attribute__((ext_vector_type(3)));
-typedef ushort ushort4 __attribute__((ext_vector_type(4)));
-typedef ushort ushort8 __attribute__((ext_vector_type(8)));
-typedef ushort ushort16 __attribute__((ext_vector_type(16)));
-typedef int int2 __attribute__((ext_vector_type(2)));
-typedef int int3 __attribute__((ext_vector_type(3)));
-typedef int int4 __attribute__((ext_vector_type(4)));
-typedef int int8 __attribute__((ext_vector_type(8)));
-typedef int int16 __attribute__((ext_vector_type(16)));
-typedef uint uint2 __attribute__((ext_vector_type(2)));
-typedef uint uint3 __attribute__((ext_vector_type(3)));
-typedef uint uint4 __attribute__((ext_vector_type(4)));
-typedef uint uint8 __attribute__((ext_vector_type(8)));
-typedef uint uint16 __attribute__((ext_vector_type(16)));
-typedef long long2 __attribute__((ext_vector_type(2)));
-typedef long long3 __attribute__((ext_vector_type(3)));
-typedef long long4 __attribute__((ext_vector_type(4)));
-typedef long long8 __attribute__((ext_vector_type(8)));
-typedef long long16 __attribute__((ext_vector_type(16)));
-typedef ulong ulong2 __attribute__((ext_vector_type(2)));
-typedef ulong ulong3 __attribute__((ext_vector_type(3)));
-typedef ulong ulong4 __attribute__((ext_vector_type(4)));
-typedef ulong ulong8 __attribute__((ext_vector_type(8)));
-typedef ulong ulong16 __attribute__((ext_vector_type(16)));
-typedef float float2 __attribute__((ext_vector_type(2)));
-typedef float float3 __attribute__((ext_vector_type(3)));
-typedef float float4 __attribute__((ext_vector_type(4)));
-typedef float float8 __attribute__((ext_vector_type(8)));
-typedef float float16 __attribute__((ext_vector_type(16)));
-
+#ifdef IS_OPENCL
+typedef ulong               ullong;
 #endif
 
 #ifdef KERNEL_STATIC
 typedef uchar  u8;
 typedef ushort u16;
 typedef uint   u32;
-typedef ulong  u64;
+typedef ullong u64;
 #else
 typedef uint8_t  u8;
 typedef uint16_t u16;
diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl
index 2bd1de39a..74f8a0e66 100644
--- a/OpenCL/m08900-pure.cl
+++ b/OpenCL/m08900-pure.cl
@@ -24,7 +24,7 @@ typedef struct
 
 } scrypt_tmp_t;
 
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
 inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
@@ -41,15 +41,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n)
 
 #endif
 
-#if defined IS_HIP
-
-inline __device__ uint4 rotate (const uint4 a, const int n)
-{
-  return ((a << n) | ((a >> (32 - n))));
-}
-
-#endif
-
 DECLSPEC uint4 hc_swap32_4 (uint4 v)
 {
   return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00),  8u));
@@ -66,7 +57,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 #define SALSA20_2R()                        \
 {                                           \
diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl
index 10a7aaa14..09819b085 100644
--- a/OpenCL/m15700-pure.cl
+++ b/OpenCL/m15700-pure.cl
@@ -31,7 +31,7 @@ typedef struct ethereum_scrypt
 
 } ethereum_scrypt_t;
 
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
 inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
@@ -48,15 +48,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n)
 
 #endif
 
-#if defined IS_HIP
-
-inline __device__ uint4 rotate (const uint4 a, const int n)
-{
-  return ((a << n) | ((a >> (32 - n))));
-}
-
-#endif
-
 DECLSPEC uint4 hc_swap32_4 (uint4 v)
 {
   return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00),  8u));
@@ -73,7 +64,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 #define SALSA20_2R()                        \
 {                                           \
diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl
index 4ecc345ce..a28b458c2 100644
--- a/OpenCL/m22700-pure.cl
+++ b/OpenCL/m22700-pure.cl
@@ -72,7 +72,7 @@ DECLSPEC int is_valid_bitcoinj (const u32 *w)
   return 1;
 }
 
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
 inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
@@ -89,15 +89,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n)
 
 #endif
 
-#if defined IS_HIP
-
-inline __device__ uint4 rotate (const uint4 a, const int n)
-{
-  return ((a << n) | ((a >> (32 - n))));
-}
-
-#endif
-
 DECLSPEC uint4 hc_swap32_4 (uint4 v)
 {
   return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00),  8u));
@@ -114,7 +105,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#if defined IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 #define SALSA20_2R()                        \
 {                                           \
diff --git a/docs/changes.txt b/docs/changes.txt
index 92cebc836..50bbfb5ae 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -5,6 +5,7 @@
 ##
 
 - Added option --multiply-accel-disable (short: -M) to disable multiply the kernel-accel with the multiprocessor count automatism
+- HIP Backend: Added support to support HIP 4.4 and later, but added check to rule out older versions because they are incompatible
 
 ##
 ## Bugs
diff --git a/include/backend.h b/include/backend.h
index 957ac229d..30e79bc89 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -88,53 +88,50 @@ int hc_cuLinkAddData             (hashcat_ctx_t *hashcat_ctx, CUlinkState state,
 int hc_cuLinkDestroy             (hashcat_ctx_t *hashcat_ctx, CUlinkState state);
 int hc_cuLinkComplete            (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut);
 
-int hc_nvrtcCreateProgram        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
-int hc_nvrtcDestroyProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
-int hc_nvrtcCompileProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options);
-int hc_nvrtcGetProgramLogSize    (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet);
-int hc_nvrtcGetProgramLog        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log);
-int hc_nvrtcGetPTXSize           (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
-int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
-int hc_nvrtcVersion              (hashcat_ctx_t *hashcat_ctx, int *major, int *minor);
-
-int hc_hipCtxCreate              (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev);
-int hc_hipCtxDestroy             (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
-int hc_hipCtxSetCurrent          (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
-int hc_hipCtxSetCacheConfig      (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config);
+int hc_hipCreateProgram          (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
+int hc_hipDestroyProgram         (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog);
+int hc_hipCompileProgram         (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options);
+int hc_hipGetProgramLogSize      (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet);
+int hc_hipGetProgramLog          (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log);
+int hc_hipGetCodeSize            (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *codeSizeRet);
+int hc_hipGetCode                (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *code);
+
+int hc_hipCtxCreate              (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
+int hc_hipCtxDestroy             (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx);
+int hc_hipCtxPopCurrent          (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx);
+int hc_hipCtxPushCurrent         (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx);
+int hc_hipCtxSetCurrent          (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx);
 int hc_hipCtxSynchronize         (hashcat_ctx_t *hashcat_ctx);
-int hc_hipDeviceGetAttribute     (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev);
+int hc_hipDeviceGet              (hashcat_ctx_t *hashcat_ctx, hipDevice_t *device, int ordinal);
+int hc_hipDeviceGetAttribute     (hashcat_ctx_t *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
 int hc_hipDeviceGetCount         (hashcat_ctx_t *hashcat_ctx, int *count);
-int hc_hipDeviceGet              (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal);
-int hc_hipDeviceGetName          (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev);
-int hc_hipDeviceTotalMem         (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev);
+int hc_hipDeviceGetName          (hashcat_ctx_t *hashcat_ctx, char *name, int len, hipDevice_t dev);
+int hc_hipDeviceTotalMem         (hashcat_ctx_t *hashcat_ctx, size_t *bytes, hipDevice_t dev);
 int hc_hipDriverGetVersion       (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
-int hc_hipEventCreate            (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags);
-int hc_hipEventDestroy           (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
-int hc_hipEventElapsedTime       (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd);
-int hc_hipEventQuery             (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
-int hc_hipEventRecord            (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream);
-int hc_hipEventSynchronize       (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
-int hc_hipFuncGetAttribute       (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc);
-//int hc_hipFuncSetAttribute       (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value);
+int hc_hipEventCreate            (hashcat_ctx_t *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
+int hc_hipEventDestroy           (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent);
+int hc_hipEventElapsedTime       (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
+int hc_hipEventQuery             (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent);
+int hc_hipEventRecord            (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream);
+int hc_hipEventSynchronize       (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent);
+int hc_hipFuncGetAttribute       (hashcat_ctx_t *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc);
 int hc_hipInit                   (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
-int hc_hipLaunchKernel           (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra);
-int hc_hipMemAlloc               (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize);
-int hc_hipMemcpyDtoD             (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount);
-int hc_hipMemcpyDtoH             (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount);
-int hc_hipMemcpyHtoD             (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
-int hc_hipMemFree                (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr);
-int hc_hipModuleGetFunction      (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name);
-int hc_hipModuleLoadDataEx       (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues);
-int hc_hipModuleUnload           (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod);
-int hc_hipStreamCreate           (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags);
-int hc_hipStreamDestroy          (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
-int hc_hipStreamSynchronize      (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
-int hc_hipCtxPushCurrent         (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
-int hc_hipCtxPopCurrent          (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx);
-int hc_hipLinkCreate             (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut);
-int hc_hipLinkAddData            (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues);
-int hc_hipLinkDestroy            (hashcat_ctx_t *hashcat_ctx, HIPlinkState state);
-int hc_hipLinkComplete           (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **cubinOut, size_t *sizeOut);
+int hc_hipLaunchKernel           (hashcat_ctx_t *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
+int hc_hipMemAlloc               (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize);
+int hc_hipMemFree                (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dptr);
+int hc_hipMemcpyDtoD             (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount);
+int hc_hipMemcpyDtoHAsync        (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
+int hc_hipMemcpyDtoH             (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
+int hc_hipMemcpyDtoDAsync        (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
+int hc_hipMemcpyHtoD             (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount);
+int hc_hipMemcpyHtoDAsync        (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
+int hc_hipModuleGetFunction      (hashcat_ctx_t *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name);
+int hc_hipModuleGetGlobal        (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
+int hc_hipModuleLoadDataEx       (hashcat_ctx_t *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
+int hc_hipModuleUnload           (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod);
+int hc_hipStreamCreate           (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
+int hc_hipStreamDestroy          (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream);
+int hc_hipStreamSynchronize      (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCompileProgram          (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
@@ -183,10 +180,10 @@ int run_cuda_kernel_utf8toutf16le   (hashcat_ctx_t *hashcat_ctx, hc_device_param
 int run_cuda_kernel_memset          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
 int run_cuda_kernel_bzero           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
 
-int run_hip_kernel_atinit           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
-int run_hip_kernel_utf8toutf16le    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
-int run_hip_kernel_memset           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size);
-int run_hip_kernel_bzero            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size);
+int run_hip_kernel_atinit           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
+int run_hip_kernel_utf8toutf16le    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
+int run_hip_kernel_memset           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u32 value, const u64 size);
+int run_hip_kernel_bzero            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size);
 
 int run_opencl_kernel_atinit        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
 int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
diff --git a/include/ext_hip.h b/include/ext_hip.h
index a99d1e5a9..1477c20c4 100644
--- a/include/ext_hip.h
+++ b/include/ext_hip.h
@@ -6,995 +6,344 @@
 #ifndef _EXT_HIP_H
 #define _EXT_HIP_H
 
-/**
- * TODO: FIX ME
- */
-
-#define __HIP_API_VERSION 4221131
-
-/**
- * HIP device pointer
- * HIPdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
- */
-#if __HIP_API_VERSION >= 3020
-
-#if defined(_WIN64) || defined(__LP64__)
-typedef unsigned long long HIPdeviceptr;
-#else
-typedef unsigned int HIPdeviceptr;
-#endif
-
-#endif /* __HIP_API_VERSION >= 3020 */
-
-typedef int HIPdevice;                                     /**< HIP device */
-typedef struct HIPctx_st *HIPcontext;                       /**< HIP context */
-typedef struct HIPevent_st *HIPevent;                       /**< HIP event */
-typedef struct HIPfunc_st *HIPfunction;                     /**< HIP function */
-typedef struct HIPmod_st *HIPmodule;                        /**< HIP module */
-typedef struct HIPstream_st *HIPstream;                     /**< HIP stream */
-typedef struct HIPlinkState_st *HIPlinkState;
-
-
-typedef enum hipError_enum {
-    /**
-     * The API call returned with no errors. In the case of query calls, this
-     * also means that the operation being queried is complete (see
-     * ::hipEventQuery() and ::hipStreamQuery()).
-     */
-    HIP_SUCCESS                              = 0,
-
-    /**
-     * This indicates that one or more of the parameters passed to the API call
-     * is not within an acceptable range of values.
-     */
-    HIP_ERROR_INVALID_VALUE                  = 1,
-
-    /**
-     * The API call failed because it was unable to allocate enough memory to
-     * perform the requested operation.
-     */
-    HIP_ERROR_OUT_OF_MEMORY                  = 2,
-
-    /**
-     * This indicates that the HIP driver has not been initialized with
-     * ::hipInit() or that initialization has failed.
-     */
-    HIP_ERROR_NOT_INITIALIZED                = 3,
-
-    /**
-     * This indicates that the HIP driver is in the process of shutting down.
-     */
-    HIP_ERROR_DEINITIALIZED                  = 4,
-
-    /**
-     * This indicates profiler is not initialized for this run. This can
-     * happen when the application is running with external profiling tools
-     * like visual profiler.
-     */
-    HIP_ERROR_PROFILER_DISABLED              = 5,
-
-    /**
-     * \deprecated
-     * This error return is deprecated as of HIP 5.0. It is no longer an error
-     * to attempt to enable/disable the profiling via ::hipProfilerStart or
-     * ::hipProfilerStop without initialization.
-     */
-    HIP_ERROR_PROFILER_NOT_INITIALIZED       = 6,
-
-    /**
-     * \deprecated
-     * This error return is deprecated as of HIP 5.0. It is no longer an error
-     * to call hipProfilerStart() when profiling is already enabled.
-     */
-    HIP_ERROR_PROFILER_ALREADY_STARTED       = 7,
-
-    /**
-     * \deprecated
-     * This error return is deprecated as of HIP 5.0. It is no longer an error
-     * to call hipProfilerStop() when profiling is already disabled.
-     */
-    HIP_ERROR_PROFILER_ALREADY_STOPPED       = 8,
-
-    /**
-     * This indicates that no HIP-capable devices were detected by the installed
-     * HIP driver.
-     */
-    HIP_ERROR_NO_DEVICE                      = 100,
-
-    /**
-     * This indicates that the device ordinal supplied by the user does not
-     * correspond to a valid HIP device.
-     */
-    HIP_ERROR_INVALID_DEVICE                 = 101,
-
-
-    /**
-     * This indicates that the device kernel image is invalid. This can also
-     * indicate an invalid HIP module.
-     */
-    HIP_ERROR_INVALID_IMAGE                  = 200,
-
-    /**
-     * This most frequently indicates that there is no context bound to the
-     * hiprrent thread. This can also be returned if the context passed to an
-     * API call is not a valid handle (such as a context that has had
-     * ::hipCtxDestroy() invoked on it). This can also be returned if a user
-     * mixes different API versions (i.e. 3010 context with 3020 API calls).
-     * See ::hipCtxGetApiVersion() for more details.
-     */
-    HIP_ERROR_INVALID_CONTEXT                = 201,
-
-    /**
-     * This indicated that the context being supplied as a parameter to the
-     * API call was already the active context.
-     * \deprecated
-     * This error return is deprecated as of HIP 3.2. It is no longer an
-     * error to attempt to push the active context via ::hipCtxPushCurrent().
-     */
-    HIP_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
-
-    /**
-     * This indicates that a map or register operation has failed.
-     */
-    HIP_ERROR_MAP_FAILED                     = 205,
-
-    /**
-     * This indicates that an unmap or unregister operation has failed.
-     */
-    HIP_ERROR_UNMAP_FAILED                   = 206,
-
-    /**
-     * This indicates that the specified array is currently mapped and thus
-     * cannot be destroyed.
-     */
-    HIP_ERROR_ARRAY_IS_MAPPED                = 207,
-
-    /**
-     * This indicates that the resource is already mapped.
-     */
-    HIP_ERROR_ALREADY_MAPPED                 = 208,
-
-    /**
-     * This indicates that there is no kernel image available that is suitable
-     * for the device. This can occur when a user specifies code generation
-     * options for a particular HIP source file that do not include the
-     * corresponding device configuration.
-     */
-    HIP_ERROR_NO_BINARY_FOR_GPU              = 209,
-
-    /**
-     * This indicates that a resource has already been acquired.
-     */
-    HIP_ERROR_ALREADY_ACQUIRED               = 210,
-
-    /**
-     * This indicates that a resource is not mapped.
-     */
-    HIP_ERROR_NOT_MAPPED                     = 211,
-
-    /**
-     * This indicates that a mapped resource is not available for access as an
-     * array.
-     */
-    HIP_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
-
-    /**
-     * This indicates that a mapped resource is not available for access as a
-     * pointer.
-     */
-    HIP_ERROR_NOT_MAPPED_AS_POINTER          = 213,
-
-    /**
-     * This indicates that an uncorrectable ECC error was detected during
-     * execution.
-     */
-    HIP_ERROR_ECC_UNCORRECTABLE              = 214,
-
-    /**
-     * This indicates that the ::HIPlimit passed to the API call is not
-     * supported by the active device.
-     */
-    HIP_ERROR_UNSUPPORTED_LIMIT              = 215,
-
-    /**
-     * This indicates that the ::HIPcontext passed to the API call can
-     * only be bound to a single CPU thread at a time but is already
-     * bound to a CPU thread.
-     */
-    HIP_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
-
-    /**
-     * This indicates that peer access is not supported across the given
-     * devices.
-     */
-    HIP_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
-
-    /**
-     * This indicates that a PTX JIT compilation failed.
-     */
-    HIP_ERROR_INVALID_PTX                    = 218,
-
-    /**
-     * This indicates an error with OpenGL or DirectX context.
-     */
-    HIP_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
-
-    /**
-    * This indicates that an uncorrectable NVLink error was detected during the
-    * execution.
-    */
-    HIP_ERROR_NVLINK_UNCORRECTABLE           = 220,
-
-    /**
-    * This indicates that the PTX JIT compiler library was not found.
-    */
-    HIP_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
-
-    /**
-     * This indicates that the device kernel source is invalid.
-     */
-    HIP_ERROR_INVALID_SOURCE                 = 300,
-
-    /**
-     * This indicates that the file specified was not found.
-     */
-    HIP_ERROR_FILE_NOT_FOUND                 = 301,
-
-    /**
-     * This indicates that a link to a shared object failed to resolve.
-     */
-    HIP_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
-
-    /**
-     * This indicates that initialization of a shared object failed.
-     */
-    HIP_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
-
-    /**
-     * This indicates that an OS call failed.
-     */
-    HIP_ERROR_OPERATING_SYSTEM               = 304,
-
-    /**
-     * This indicates that a resource handle passed to the API call was not
-     * valid. Resource handles are opaque types like ::HIPstream and ::HIPevent.
-     */
-    HIP_ERROR_INVALID_HANDLE                 = 400,
-
-    /**
-     * This indicates that a resource required by the API call is not in a
-     * valid state to perform the requested operation.
-     */
-    HIP_ERROR_ILLEGAL_STATE                  = 401,
-
-    /**
-     * This indicates that a named symbol was not found. Examples of symbols
-     * are global/constant variable names, texture names, and surface names.
-     */
-    HIP_ERROR_NOT_FOUND                      = 500,
-
-    /**
-     * This indicates that asynchronous operations issued previously have not
-     * completed yet. This result is not actually an error, but must be indicated
-     * differently than ::HIP_SUCCESS (which indicates completion). Calls that
-     * may return this value include ::hipEventQuery() and ::hipStreamQuery().
-     */
-    HIP_ERROR_NOT_READY                      = 600,
-
-    /**
-     * While executing a kernel, the device encountered a
-     * load or store instruction on an invalid memory address.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_ILLEGAL_ADDRESS                = 700,
-
-    /**
-     * This indicates that a launch did not occur because it did not have
-     * appropriate resources. This error usually indicates that the user has
-     * attempted to pass too many arguments to the device kernel, or the
-     * kernel launch specifies too many threads for the kernel's register
-     * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
-     * when a 32-bit int is expected) is equivalent to passing too many
-     * arguments and can also result in this error.
-     */
-    HIP_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
-
-    /**
-     * This indicates that the device kernel took too long to execute. This can
-     * only occur if timeouts are enabled - see the device attribute
-     * ::HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_LAUNCH_TIMEOUT                 = 702,
-
-    /**
-     * This error indicates a kernel launch that uses an incompatible texturing
-     * mode.
-     */
-    HIP_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
-
-    /**
-     * This error indicates that a call to ::hipCtxEnablePeerAccess() is
-     * trying to re-enable peer access to a context which has already
-     * had peer access to it enabled.
-     */
-    HIP_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
-
-    /**
-     * This error indicates that ::hipCtxDisablePeerAccess() is
-     * trying to disable peer access which has not been enabled yet
-     * via ::hipCtxEnablePeerAccess().
-     */
-    HIP_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
-
-    /**
-     * This error indicates that the primary context for the specified device
-     * has already been initialized.
-     */
-    HIP_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
-
-    /**
-     * This error indicates that the context hiprrent to the calling thread
-     * has been destroyed using ::hipCtxDestroy, or is a primary context which
-     * has not yet been initialized.
-     */
-    HIP_ERROR_CONTEXT_IS_DESTROYED           = 709,
-
-    /**
-     * A device-side assert triggered during kernel execution. The context
-     * cannot be used anymore, and must be destroyed. All existing device
-     * memory allocations from this context are invalid and must be
-     * reconstructed if the program is to continue using HIP.
-     */
-    HIP_ERROR_ASSERT                         = 710,
-
-    /**
-     * This error indicates that the hardware resources required to enable
-     * peer access have been exhausted for one or more of the devices
-     * passed to ::hipCtxEnablePeerAccess().
-     */
-    HIP_ERROR_TOO_MANY_PEERS                 = 711,
-
-    /**
-     * This error indicates that the memory range passed to ::hipMemHostRegister()
-     * has already been registered.
-     */
-    HIP_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
-
-    /**
-     * This error indicates that the pointer passed to ::hipMemHostUnregister()
-     * does not correspond to any currently registered memory region.
-     */
-    HIP_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
-
-    /**
-     * While executing a kernel, the device encountered a stack error.
-     * This can be due to stack corruption or exceeding the stack size limit.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_HARDWARE_STACK_ERROR           = 714,
-
-    /**
-     * While executing a kernel, the device encountered an illegal instruction.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_ILLEGAL_INSTRUCTION            = 715,
-
-    /**
-     * While executing a kernel, the device encountered a load or store instruction
-     * on a memory address which is not aligned.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_MISALIGNED_ADDRESS             = 716,
-
-    /**
-     * While executing a kernel, the device encountered an instruction
-     * which can only operate on memory locations in certain address spaces
-     * (global, shared, or local), but was supplied a memory address not
-     * belonging to an allowed address space.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_INVALID_ADDRESS_SPACE          = 717,
-
-    /**
-     * While executing a kernel, the device program counter wrapped its address space.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_INVALID_PC                     = 718,
-
-    /**
-     * An exception occurred on the device while executing a kernel. Common
-     * causes include dereferencing an invalid device pointer and accessing
-     * out of bounds shared memory. Less common cases can be system specific - more
-     * information about these cases can be found in the system specific user guide.
-     * This leaves the process in an inconsistent state and any further HIP work
-     * will return the same error. To continue using HIP, the process must be terminated
-     * and relaunched.
-     */
-    HIP_ERROR_LAUNCH_FAILED                  = 719,
-
-    /**
-     * This error indicates that the number of blocks launched per grid for a kernel that was
-     * launched via either ::hipLaunchCooperativeKernel or ::hipLaunchCooperativeKernelMultiDevice
-     * exceeds the maximum number of blocks as allowed by ::hipOccupancyMaxActiveBlocksPerMultiprocessor
-     * or ::hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
-     * as specified by the device attribute ::HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
-     */
-    HIP_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
-
-    /**
-     * This error indicates that the attempted operation is not permitted.
-     */
-    HIP_ERROR_NOT_PERMITTED                  = 800,
-
-    /**
-     * This error indicates that the attempted operation is not supported
-     * on the current system or device.
-     */
-    HIP_ERROR_NOT_SUPPORTED                  = 801,
-
-    /**
-     * This error indicates that the system is not yet ready to start any HIP
-     * work.  To continue using HIP, verify the system configuration is in a
-     * valid state and all required driver daemons are actively running.
-     * More information about this error can be found in the system specific
-     * user guide.
-     */
-    HIP_ERROR_SYSTEM_NOT_READY               = 802,
-
-    /**
-     * This error indicates that there is a mismatch between the versions of
-     * the display driver and the HIP driver. Refer to the compatibility documentation
-     * for supported versions.
-     */
-    HIP_ERROR_SYSTEM_DRIVER_MISMATCH         = 803,
-
-    /**
-     * This error indicates that the system was upgraded to run with forward compatibility
-     * but the visible hardware detected by HIP does not support this configuration.
-     * Refer to the compatibility documentation for the supported hardware matrix or ensure
-     * that only supported hardware is visible during initialization via the HIP_VISIBLE_DEVICES
-     * environment variable.
-     */
-    HIP_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
-
-    /**
-     * This error indicates that the operation is not permitted when
-     * the stream is capturing.
-     */
-    HIP_ERROR_STREAM_CAPTURE_UNSUPPORTED     = 900,
-
-    /**
-     * This error indicates that the current capture sequence on the stream
-     * has been invalidated due to a previous error.
-     */
-    HIP_ERROR_STREAM_CAPTURE_INVALIDATED     = 901,
-
-    /**
-     * This error indicates that the operation would have resulted in a merge
-     * of two independent capture sequences.
-     */
-    HIP_ERROR_STREAM_CAPTURE_MERGE           = 902,
-
-    /**
-     * This error indicates that the capture was not initiated in this stream.
-     */
-    HIP_ERROR_STREAM_CAPTURE_UNMATCHED       = 903,
-
-    /**
-     * This error indicates that the capture sequence contains a fork that was
-     * not joined to the primary stream.
-     */
-    HIP_ERROR_STREAM_CAPTURE_UNJOINED        = 904,
-
-    /**
-     * This error indicates that a dependency would have been created which
-     * crosses the capture sequence boundary. Only implicit in-stream ordering
-     * dependencies are allowed to cross the boundary.
-     */
-    HIP_ERROR_STREAM_CAPTURE_ISOLATION       = 905,
-
-    /**
-     * This error indicates a disallowed implicit dependency on a current capture
-     * sequence from HIPStreamLegacy.
-     */
-    HIP_ERROR_STREAM_CAPTURE_IMPLICIT        = 906,
-
-    /**
-     * This error indicates that the operation is not permitted on an event which
-     * was last recorded in a capturing stream.
-     */
-    HIP_ERROR_CAPTURED_EVENT                 = 907,
-
-    /**
-     * A stream capture sequence not initiated with the ::HIP_STREAM_CAPTURE_MODE_RELAXED
-     * argument to ::HIPStreamBeginCapture was passed to ::hipStreamEndCapture in a
-     * different thread.
-     */
-    HIP_ERROR_STREAM_CAPTURE_WRONG_THREAD    = 908,
-
-    /**
-     * This indicates that an unknown internal error has occurred.
-     */
-    HIP_ERROR_UNKNOWN                        = 999
-} HIPresult;
-
-/**
- * Online compiler and linker options
- */
-typedef enum HIPjit_option_enum
-{
-    /**
-     * Max number of registers that a thread may use.\n
-     * Option type: unsigned int\n
-     * Applies to: compiler only
-     */
-    HIP_JIT_MAX_REGISTERS = 0,
-
-    /**
-     * IN: Specifies minimum number of threads per block to target compilation
-     * for\n
-     * OUT: Returns the number of threads the compiler actually targeted.
-     * This restricts the resource utilization fo the compiler (e.g. max
-     * registers) such that a block with the given number of threads should be
-     * able to launch based on register limitations. Note, this option does not
-     * currently take into account any other resource limitations, such as
-     * shared memory utilization.\n
-     * Cannot be combined with ::HIP_JIT_TARGET.\n
-     * Option type: unsigned int\n
-     * Applies to: compiler only
-     */
-    HIP_JIT_THREADS_PER_BLOCK,
-
-    /**
-     * Overwrites the option value with the total wall clock time, in
-     * milliseconds, spent in the compiler and linker\n
-     * Option type: float\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_WALL_TIME,
-
-    /**
-     * Pointer to a buffer in which to print any log messages
-     * that are informational in nature (the buffer size is specified via
-     * option ::HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n
-     * Option type: char *\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_INFO_LOG_BUFFER,
-
-    /**
-     * IN: Log buffer size in bytes.  Log messages will be capped at this size
-     * (including null terminator)\n
-     * OUT: Amount of log buffer filled with messages\n
-     * Option type: unsigned int\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
-
-    /**
-     * Pointer to a buffer in which to print any log messages that
-     * reflect errors (the buffer size is specified via option
-     * ::HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
-     * Option type: char *\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_ERROR_LOG_BUFFER,
-
-    /**
-     * IN: Log buffer size in bytes.  Log messages will be capped at this size
-     * (including null terminator)\n
-     * OUT: Amount of log buffer filled with messages\n
-     * Option type: unsigned int\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
-
-    /**
-     * Level of optimizations to apply to generated code (0 - 4), with 4
-     * being the default and highest level of optimizations.\n
-     * Option type: unsigned int\n
-     * Applies to: compiler only
-     */
-    HIP_JIT_OPTIMIZATION_LEVEL,
-
-    /**
-     * No option value required. Determines the target based on the current
-     * attached context (default)\n
-     * Option type: No option value needed\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_TARGET_FROM_HIPCONTEXT,
-
-    /**
-     * Target is chosen based on supplied ::HIPjit_target.  Cannot be
-     * combined with ::HIP_JIT_THREADS_PER_BLOCK.\n
-     * Option type: unsigned int for enumerated type ::HIPjit_target\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_TARGET,
-
-    /**
-     * Specifies choice of fallback strategy if matching HIPbin is not found.
-     * Choice is based on supplied ::HIPjit_fallback.  This option cannot be
-     * used with HIPLink* APIs as the linker requires exact matches.\n
-     * Option type: unsigned int for enumerated type ::HIPjit_fallback\n
-     * Applies to: compiler only
-     */
-    HIP_JIT_FALLBACK_STRATEGY,
-
-    /**
-     * Specifies whether to create debug information in output (-g)
-     * (0: false, default)\n
-     * Option type: int\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_GENERATE_DEBUG_INFO,
-
-    /**
-     * Generate verbose log messages (0: false, default)\n
-     * Option type: int\n
-     * Applies to: compiler and linker
-     */
-    HIP_JIT_LOG_VERBOSE,
-
-    /**
-     * Generate line number information (-lineinfo) (0: false, default)\n
-     * Option type: int\n
-     * Applies to: compiler only
-     */
-    HIP_JIT_GENERATE_LINE_INFO,
-
-    /**
-     * Specifies whether to enable caching explicitly (-dlcm) \n
-     * Choice is based on supplied ::HIPjit_cacheMode_enum.\n
-     * Option type: unsigned int for enumerated type ::HIPjit_cacheMode_enum\n
-     * Applies to: compiler only
-     */
-    HIP_JIT_CACHE_MODE,
-
-    /**
-     * The below jit options are used for internal purposes only, in this version of HIP
-     */
-    HIP_JIT_NEW_SM3X_OPT,
-    HIP_JIT_FAST_COMPILE,
-
-    /**
-     * Array of device symbol names that will be relocated to the corresponing
-     * host addresses stored in ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
-     * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n
-     * When loding a device module, driver will relocate all encountered
-     * unresolved symbols to the host addresses.\n
-     * It is only allowed to register symbols that correspond to unresolved
-     * global variables.\n
-     * It is illegal to register the same device symbol at multiple addresses.\n
-     * Option type: const char **\n
-     * Applies to: dynamic linker only
-     */
-    HIP_JIT_GLOBAL_SYMBOL_NAMES,
-
-    /**
-     * Array of host addresses that will be used to relocate corresponding
-     * device symbols stored in ::HIP_JIT_GLOBAL_SYMBOL_NAMES.\n
-     * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n
-     * Option type: void **\n
-     * Applies to: dynamic linker only
-     */
-    HIP_JIT_GLOBAL_SYMBOL_ADDRESSES,
-
-    /**
-     * Number of entries in ::HIP_JIT_GLOBAL_SYMBOL_NAMES and
-     * ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n
-     * Option type: unsigned int\n
-     * Applies to: dynamic linker only
-     */
-    HIP_JIT_GLOBAL_SYMBOL_COUNT,
-
-    HIP_JIT_NUM_OPTIONS
-
-} HIPjit_option;
-
-
-/**
- * Device properties
- */
-typedef enum HIPdevice_attribute_enum {
-    
-    HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,              /**< Maximum number of threads per block */
-    HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 1,                    /**< Maximum block dimension X */
-    HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 2,                    /**< Maximum block dimension Y */
-    HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 3,                    /**< Maximum block dimension Z */
-    HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 4,                     /**< Maximum grid dimension X */
-    HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 5,                     /**< Maximum grid dimension Y */
-    HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 6,                     /**< Maximum grid dimension Z */
-    HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 7,        /**< Maximum shared memory available per block in bytes */
-    HIP_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 7,            /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
-    HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 7, /**< Maximum optin shared memory per block */
-    HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 8,              /**< Memory available on device for __constant__ variables in a HIP C kernel in bytes */
-    HIP_DEVICE_ATTRIBUTE_WARP_SIZE = 9,                         /**< Warp size in threads */
-    HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 10,           /**< Maximum number of 32-bit registers available per block */
-    HIP_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 10,               /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
-    HIP_DEVICE_ATTRIBUTE_CLOCK_RATE = 11,                        /**< Typical clock frequency in kilohertz */
-    HIP_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 12,                 /**< Peak memory clock frequency in kilohertz */
-    HIP_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 13,           /**< Global memory bus width in bits */
-    HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 14,              /**< Number of multiprocessors on device */
-    HIP_DEVICE_ATTRIBUTE_COMPUTE_MODE = 15,                      /**< Compute mode (See ::HIPcomputemode for details) */
-    HIP_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 16,                     /**< Size of L2 cache in bytes */
-    HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 17,    /**< Maximum resident threads per multiprocessor */
-    HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 18,          /**< Major compute capability version number */
-    HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 19,          /**< Minor compute capability version number */
-    HIP_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 20,                /**< Device can possibly execute multiple kernels concurrently */
-    HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID = 21,                        /**< PCI bus ID of the device */
-    HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 22,                     /**< PCI device ID of the device */
-    HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 22,                     /**< PCI domain ID of the device */
-    HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 23,  /**< Maximum shared memory available per multiprocessor in bytes */
-    HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 24,                    /**< Device is on a multi-GPU board */
-    HIP_DEVICE_ATTRIBUTE_INTEGRATED = 25,                        /**< Device is integrated with host memory */
-    HIP_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 26,                /**< Device supports launching cooperative kernels via ::hipLaunchCooperativeKernel */
-    HIP_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 27,   /**< Device can participate in cooperative kernels launched via ::hipLaunchCooperativeKernelMultiDevice */
-    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 28,           /**< Maximum 1D texture width */
-    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 29,           /**< Maximum 2D texture width */
-    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 30,          /**< Maximum 2D texture height */
-    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 31,           /**< Maximum 3D texture width */
-    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 32,          /**< Maximum 3D texture height */
-    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 33,           /**< Maximum 3D texture depth */
-    
-    HIP_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 37,                 /**< Alignment requirement for textures */
-    HIP_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 38,           /**< Pitch alignment requirement for textures */
-    HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 39,               /**< Specifies whether there is a run time limit on kernels */
-    HIP_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 40,               /**< Device can map host memory into HIP address space */
-    HIP_DEVICE_ATTRIBUTE_ECC_ENABLED = 41,                       /**< Device has ECC support enabled */
-    
-    HIP_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 47,                    /**< Device can allocate managed memory on this system */
-    HIP_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 48, /**< The host can directly access managed memory on the device without migration. */
-    HIP_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 49,         /**< Device can coherently access managed memory concurrently with the CPU */
-    HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 50,            /**< Device supports coherently accessing pageable memory without calling HIPHostRegister on it */
-    HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 51, /**< Device accesses pageable memory via the host's page tables. */
-    HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 52,     /**< ::HIP_STREAM_WAIT_VALUE_NOR is supported. */
-    
-    
-    // HIP_DEVICE_ATTRIBUTE_MAX_PITCH = ,                         /**< Maximum pitch in bytes allowed by memory copies */
-    // HIP_DEVICE_ATTRIBUTE_GPU_OVERLAP = ,                       /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */
-    // 
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = ,   /**< Maximum 2D layered texture width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = ,  /**< Maximum 2D layered texture height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = ,  /**< Maximum layers in a 2D layered texture */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = ,     /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = ,    /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */
-    // HIP_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT =,                 /**< Alignment requirement for surfaces */
-    // HIP_DEVICE_ATTRIBUTE_TCC_DRIVER = ,                        /**< Device is using TCC driver model */
-    // HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = ,                /**< Number of asynchronous engines */
-    // HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = ,                /**< Device shares a unified address space with the host */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = ,   /**< Maximum 1D layered texture width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = ,  /**< Maximum layers in a 1D layered texture */
-    // HIP_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = ,                  /**< Deprecated, do not use. */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = ,    /**< Maximum 2D texture width if HIP_ARRAY3D_TEXTURE_GATHER is set */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = ,   /**< Maximum 2D texture height if HIP_ARRAY3D_TEXTURE_GATHER is set */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = , /**< Alternate maximum 3D texture width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = ,/**< Alternate maximum 3D texture height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = , /**< Alternate maximum 3D texture depth */
-    // 
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = ,      /**< Maximum cubemap texture width/height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = ,  /**< Maximum cubemap layered texture width/height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered texture */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = ,           /**< Maximum 1D surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = ,           /**< Maximum 2D surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = ,          /**< Maximum 2D surface height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = ,           /**< Maximum 3D surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = ,          /**< Maximum 3D surface height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = ,           /**< Maximum 3D surface depth */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = ,   /**< Maximum 1D layered surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = ,  /**< Maximum layers in a 1D layered surface */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = ,   /**< Maximum 2D layered surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = ,  /**< Maximum 2D layered surface height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = ,  /**< Maximum layers in a 2D layered surface */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = ,      /**< Maximum cubemap surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = ,  /**< Maximum cubemap layered surface width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered surface */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = ,    /**< Maximum 1D linear texture width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = ,    /**< Maximum 2D linear texture width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = ,   /**< Maximum 2D linear texture height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = ,    /**< Maximum 2D linear texture pitch in bytes */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 2D texture width */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = ,/**< Maximum mipmapped 2D texture height */
-    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 1D texture width */
-    // HIP_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = ,       /**< Device supports stream priorities */
-    // HIP_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = ,         /**< Device supports caching globals in L1 */
-    // HIP_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = ,          /**< Device supports caching locals in L1 */
-    // HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = ,  /**< Maximum number of 32-bit registers available per multiprocessor */
-    // HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = ,           /**< Unique id for a group of devices on the same multi-GPU board */
-    // HIP_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = ,       /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/
-    // HIP_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = ,  /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
-    // HIP_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = ,      /**< Device supports compute preemption. */
-    // HIP_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = , /**< Device can access host registered memory at the same virtual address as the CPU */
-    // HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = ,            /**< ::hipStreamBatchMemOp and related APIs are supported. */
-    // HIP_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = ,     /**< 64-bit operations are supported in ::hipStreamBatchMemOp and related APIs. */
-    // HIP_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = ,           /**< Both the ::HIP_STREAM_WAIT_VALUE_FLUSH flag and the ::HIP_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref HIP_MEMOP for additional details. */
-    // HIP_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = ,           /**< Device supports host memory registration via ::HIPHostRegister. */
-    // HIP_DEVICE_ATTRIBUTE_MAX
-} HIPdevice_attribute;
-
-/**
- * Function cache configurations
- */
-typedef enum HIPfunc_cache_enum {
-    HIP_FUNC_CACHE_PREFER_NONE    = 0x00, /**< no preference for shared memory or L1 (default) */
-    HIP_FUNC_CACHE_PREFER_SHARED  = 0x01, /**< prefer larger shared memory and smaller L1 cache */
-    HIP_FUNC_CACHE_PREFER_L1      = 0x02, /**< prefer larger L1 cache and smaller shared memory */
-    HIP_FUNC_CACHE_PREFER_EQUAL   = 0x03  /**< prefer equal sized L1 cache and shared memory */
-} HIPfunc_cache;
-
-/**
- * Shared memory configurations
- */
-typedef enum HIPsharedconfig_enum {
-    HIP_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE    = 0x00, /**< set default shared memory bank size */
-    HIP_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE  = 0x01, /**< set shared memory bank width to four bytes */
-    HIP_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02  /**< set shared memory bank width to eight bytes */
-} HIPsharedconfig;
-
-/**
- * Function properties
- */
-typedef enum HIPfunction_attribute_enum {
-    /**
-     * The maximum number of threads per block, beyond which a launch of the
-     * function would fail. This number depends on both the function and the
-     * device on which the function is currently loaded.
-     */
-    HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
-
-    /**
-     * The size in bytes of statically-allocated shared memory required by
-     * this function. This does not include dynamically-allocated shared
-     * memory requested by the user at runtime.
-     */
-    HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
-
-    /**
-     * The size in bytes of user-allocated constant memory required by this
-     * function.
-     */
-    HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
-
-    /**
-     * The size in bytes of local memory used by each thread of this function.
-     */
-    HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
-
-    /**
-     * The number of registers used by each thread of this function.
-     */
-    HIP_FUNC_ATTRIBUTE_NUM_REGS = 4,
-
-    /**
-     * The PTX virtual architecture version for which the function was
-     * compiled. This value is the major PTX version * 10 + the minor PTX
-     * version, so a PTX version 1.3 function would return the value 13.
-     * Note that this may return the undefined value of 0 for cubins
-     * compiled prior to HIP 3.0.
-     */
-    HIP_FUNC_ATTRIBUTE_PTX_VERSION = 5,
-
-    /**
-     * The binary architecture version for which the function was compiled.
-     * This value is the major binary version * 10 + the minor binary version,
-     * so a binary version 1.3 function would return the value 13. Note that
-     * this will return a value of 10 for legacy cubins that do not have a
-     * properly-encoded binary architecture version.
-     */
-    HIP_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
-
-    /**
-     * The attribute to indicate whether the function has been compiled with
-     * user specified option "-Xptxas --dlcm=ca" set .
-     */
-    HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
-
-    /**
-     * The maximum size in bytes of dynamically-allocated shared memory that can be used by
-     * this function. If the user-specified dynamic shared memory size is larger than this
-     * value, the launch will fail.
-     * See ::hipFuncSetAttribute
-     */
-    HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
-
-    /**
-     * On devices where the L1 cache and shared memory use the same hardware resources,
-     * this sets the shared memory carveout preference, in percent of the total shared memory.
-     * Refer to ::HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
-     * This is only a hint, and the driver can choose a different ratio if required to execute the function.
-     * See ::hipFuncSetAttribute
-     */
-    HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
-
+// The general Idea with HIP is to use it for AMD GPU since we use CUDA for NV
+// Therefore, we need to take certain items, such as hipDeviceptr_t from driver specific paths like amd_driver_types.h
+// We just need to keep this in mind in case we need to update these constants from future SDK versions
+
+// start: amd_driver_types.h
+
+typedef void* hipDeviceptr_t;
+
+typedef enum hipFunction_attribute {
+    HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+    HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
+    HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,
+    HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
+    HIP_FUNC_ATTRIBUTE_NUM_REGS,
+    HIP_FUNC_ATTRIBUTE_PTX_VERSION,
+    HIP_FUNC_ATTRIBUTE_BINARY_VERSION,
+    HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,
+    HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
+    HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
     HIP_FUNC_ATTRIBUTE_MAX
-} HIPfunction_attribute;
-
-/**
- * Context creation flags
- */
-typedef enum HIPctx_flags_enum {
-    HIP_CTX_SCHED_AUTO          = 0x00, /**< Automatic scheduling */
-    HIP_CTX_SCHED_SPIN          = 0x01, /**< Set spin as default scheduling */
-    HIP_CTX_SCHED_YIELD         = 0x02, /**< Set yield as default scheduling */
-    HIP_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
-    HIP_CTX_BLOCKING_SYNC       = 0x04, /**< Set blocking synchronization as default scheduling
-                                         *  \deprecated This flag was deprecated as of HIP 4.0
-                                         *  and was replaced with ::HIP_CTX_SCHED_BLOCKING_SYNC. */
-    HIP_CTX_SCHED_MASK          = 0x07,
-    HIP_CTX_MAP_HOST            = 0x08, /**< Support mapped pinned allocations */
-    HIP_CTX_LMEM_RESIZE_TO_MAX  = 0x10, /**< Keep local memory allocation after launch */
-    HIP_CTX_FLAGS_MASK          = 0x1f
-} HIPctx_flags;
-
-/**
- * Stream creation flags
- */
-typedef enum HIPstream_flags_enum {
-    HIP_STREAM_DEFAULT      = 0x0, /**< Default stream flag */
-    HIP_STREAM_NON_BLOCKING = 0x1  /**< Stream does not synchronize with stream 0 (the NULL stream) */
-} HIPstream_flags;
-
-/**
- * Event creation flags
- */
-typedef enum HIPevent_flags_enum {
-    HIP_EVENT_DEFAULT        = 0x0, /**< Default event flag */
-    HIP_EVENT_BLOCKING_SYNC  = 0x1, /**< Event uses blocking synchronization */
-    HIP_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */
-    HIP_EVENT_INTERPROCESS   = 0x4  /**< Event is suitable for interprocess use. HIP_EVENT_DISABLE_TIMING must be set */
-} HIPevent_flags;
+}hipFunction_attribute;
 
-typedef enum HIPjitInputType_enum
-{
-    /**
-     * Compiled device-class-specific device code\n
-     * Applicable options: none
-     */
-    HIP_JIT_INPUT_HIPBIN = 0,
+// stop: amd_driver_types.h
 
-    /**
-     * PTX source code\n
-     * Applicable options: PTX compiler options
-     */
-    HIP_JIT_INPUT_PTX,
+// start: hip_runtime_api.h
 
-    /**
-     * Bundle of multiple cubins and/or PTX of some device code\n
-     * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY
-     */
-    HIP_JIT_INPUT_FATBINARY,
+typedef int hipDevice_t;
+typedef struct ihipCtx_t* hipCtx_t;
+typedef struct ihipEvent_t* hipEvent_t;
+typedef struct ihipStream_t* hipStream_t;
+typedef struct ihipModule_t* hipModule_t;
+typedef struct ihipModuleSymbol_t* hipFunction_t;
 
-    /**
-     * Host object with embedded device code\n
-     * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY
-     */
-    HIP_JIT_INPUT_OBJECT,
+// Ignoring error-code return values from hip APIs is discouraged. On C++17,
+// we can make that yield a warning
+#if __cplusplus >= 201703L
+#define __HIP_NODISCARD [[nodiscard]]
+#else
+#define __HIP_NODISCARD
+#endif
 
-    /**
-     * Archive of host objects with embedded device code\n
-     * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY
-     */
-    HIP_JIT_INPUT_LIBRARY,
+typedef enum __HIP_NODISCARD hipError_t {
+    hipSuccess = 0,  ///< Successful completion.
+    hipErrorInvalidValue = 1,  ///< One or more of the parameters passed to the API call is NULL
+                               ///< or not in an acceptable range.
+    hipErrorOutOfMemory = 2,
+    // Deprecated
+    hipErrorMemoryAllocation = 2,  ///< Memory allocation error.
+    hipErrorNotInitialized = 3,
+    // Deprecated
+    hipErrorInitializationError = 3,
+    hipErrorDeinitialized = 4,
+    hipErrorProfilerDisabled = 5,
+    hipErrorProfilerNotInitialized = 6,
+    hipErrorProfilerAlreadyStarted = 7,
+    hipErrorProfilerAlreadyStopped = 8,
+    hipErrorInvalidConfiguration = 9,
+    hipErrorInvalidPitchValue = 12,
+    hipErrorInvalidSymbol = 13,
+    hipErrorInvalidDevicePointer = 17,  ///< Invalid Device Pointer
+    hipErrorInvalidMemcpyDirection = 21,  ///< Invalid memory copy direction
+    hipErrorInsufficientDriver = 35,
+    hipErrorMissingConfiguration = 52,
+    hipErrorPriorLaunchFailure = 53,
+    hipErrorInvalidDeviceFunction = 98,
+    hipErrorNoDevice = 100,  ///< Call to hipGetDeviceCount returned 0 devices
+    hipErrorInvalidDevice = 101,  ///< DeviceID must be in range 0...#compute-devices.
+    hipErrorInvalidImage = 200,
+    hipErrorInvalidContext = 201,  ///< Produced when input context is invalid.
+    hipErrorContextAlreadyCurrent = 202,
+    hipErrorMapFailed = 205,
+    // Deprecated
+    hipErrorMapBufferObjectFailed = 205,  ///< Produced when the IPC memory attach failed from ROCr.
+    hipErrorUnmapFailed = 206,
+    hipErrorArrayIsMapped = 207,
+    hipErrorAlreadyMapped = 208,
+    hipErrorNoBinaryForGpu = 209,
+    hipErrorAlreadyAcquired = 210,
+    hipErrorNotMapped = 211,
+    hipErrorNotMappedAsArray = 212,
+    hipErrorNotMappedAsPointer = 213,
+    hipErrorECCNotCorrectable = 214,
+    hipErrorUnsupportedLimit = 215,
+    hipErrorContextAlreadyInUse = 216,
+    hipErrorPeerAccessUnsupported = 217,
+    hipErrorInvalidKernelFile = 218,  ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
+    hipErrorInvalidGraphicsContext = 219,
+    hipErrorInvalidSource = 300,
+    hipErrorFileNotFound = 301,
+    hipErrorSharedObjectSymbolNotFound = 302,
+    hipErrorSharedObjectInitFailed = 303,
+    hipErrorOperatingSystem = 304,
+    hipErrorInvalidHandle = 400,
+    // Deprecated
+    hipErrorInvalidResourceHandle = 400,  ///< Resource handle (hipEvent_t or hipStream_t) invalid.
+    hipErrorNotFound = 500,
+    hipErrorNotReady = 600,  ///< Indicates that asynchronous operations enqueued earlier are not
+                             ///< ready.  This is not actually an error, but is used to distinguish
+                             ///< from hipSuccess (which indicates completion).  APIs that return
+                             ///< this error include hipEventQuery and hipStreamQuery.
+    hipErrorIllegalAddress = 700,
+    hipErrorLaunchOutOfResources = 701,  ///< Out of resources error.
+    hipErrorLaunchTimeOut = 702,
+    hipErrorPeerAccessAlreadyEnabled =
+        704,  ///< Peer access was already enabled from the current device.
+    hipErrorPeerAccessNotEnabled =
+        705,  ///< Peer access was never enabled from the current device.
+    hipErrorSetOnActiveProcess = 708,
+    hipErrorContextIsDestroyed = 709,
+    hipErrorAssert = 710,  ///< Produced when the kernel calls assert.
+    hipErrorHostMemoryAlreadyRegistered =
+        712,  ///< Produced when trying to lock a page-locked memory.
+    hipErrorHostMemoryNotRegistered =
+        713,  ///< Produced when trying to unlock a non-page-locked memory.
+    hipErrorLaunchFailure =
+        719,  ///< An exception occurred on the device while executing a kernel.
+    hipErrorCooperativeLaunchTooLarge =
+        720,  ///< This error indicates that the number of blocks launched per grid for a kernel
+              ///< that was launched via cooperative launch APIs exceeds the maximum number of
+              ///< allowed blocks for the current device
+    hipErrorNotSupported = 801,  ///< Produced when the hip API is not supported/implemented
+    hipErrorStreamCaptureUnsupported = 900,  ///< The operation is not permitted when the stream
+                                             ///< is capturing.
+    hipErrorStreamCaptureInvalidated = 901,  ///< The current capture sequence on the stream
+                                             ///< has been invalidated due to a previous error.
+    hipErrorStreamCaptureMerge = 902,  ///< The operation would have resulted in a merge of
+                                       ///< two independent capture sequences.
+    hipErrorStreamCaptureUnmatched = 903,  ///< The capture was not initiated in this stream.
+    hipErrorStreamCaptureUnjoined = 904,  ///< The capture sequence contains a fork that was not
+                                          ///< joined to the primary stream.
+    hipErrorStreamCaptureIsolation = 905,  ///< A dependency would have been created which crosses
+                                           ///< the capture sequence boundary. Only implicit
+                                           ///< in-stream ordering dependencies  are allowed
+                                           ///< to cross the boundary
+    hipErrorStreamCaptureImplicit = 906,  ///< The operation would have resulted in a disallowed
+                                          ///< implicit dependency on a current capture sequence
+                                          ///< from hipStreamLegacy.
+    hipErrorCapturedEvent = 907,  ///< The operation is not permitted on an event which was last
+                                  ///< recorded in a capturing stream.
+    hipErrorStreamCaptureWrongThread = 908,  ///< A stream capture sequence not initiated with
+                                             ///< the hipStreamCaptureModeRelaxed argument to
+                                             ///< hipStreamBeginCapture was passed to
+                                             ///< hipStreamEndCapture in a different thread.
+    hipErrorUnknown = 999,  //< Unknown error.
+    // HSA Runtime Error Codes start here.
+    hipErrorRuntimeMemory = 1052,  ///< HSA runtime memory call returned error.  Typically not seen
+                                   ///< in production systems.
+    hipErrorRuntimeOther = 1053,  ///< HSA runtime call other than memory returned error.  Typically
+                                  ///< not seen in production systems.
+    hipErrorTbd  ///< Marker that more error codes are needed.
+} hipError_t;
+
+#undef __HIP_NODISCARD
+
+typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeCudaCompatibleBegin = 0,
+
+    hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
+    hipDeviceAttributeAccessPolicyMaxWindowSize,        ///< Cuda only. The maximum size of the window policy in bytes.
+    hipDeviceAttributeAsyncEngineCount,                 ///< Cuda only. Asynchronous engines number.
+    hipDeviceAttributeCanMapHostMemory,                 ///< Whether host memory can be mapped into device address space
+    hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
+                                                        ///< at the same virtual address as the CPU
+    hipDeviceAttributeClockRate,                        ///< Peak clock frequency in kilohertz.
+    hipDeviceAttributeComputeMode,                      ///< Compute mode that device is currently in.
+    hipDeviceAttributeComputePreemptionSupported,       ///< Cuda only. Device supports Compute Preemption.
+    hipDeviceAttributeConcurrentKernels,                ///< Device can possibly execute multiple kernels concurrently.
+    hipDeviceAttributeConcurrentManagedAccess,          ///< Device can coherently access managed memory concurrently with the CPU
+    hipDeviceAttributeCooperativeLaunch,                ///< Support cooperative launch
+    hipDeviceAttributeCooperativeMultiDeviceLaunch,     ///< Support cooperative launch on multiple devices
+    hipDeviceAttributeDeviceOverlap,                    ///< Cuda only. Device can concurrently copy memory and execute a kernel.
+                                                        ///< Deprecated. Use instead asyncEngineCount.
+    hipDeviceAttributeDirectManagedMemAccessFromHost,   ///< Host can directly access managed memory on
+                                                        ///< the device without migration
+    hipDeviceAttributeGlobalL1CacheSupported,           ///< Cuda only. Device supports caching globals in L1
+    hipDeviceAttributeHostNativeAtomicSupported,        ///< Cuda only. Link between the device and the host supports native atomic operations
+    hipDeviceAttributeIntegrated,                       ///< Device is integrated GPU
+    hipDeviceAttributeIsMultiGpuBoard,                  ///< Multiple GPU devices.
+    hipDeviceAttributeKernelExecTimeout,                ///< Run time limit for kernels executed on the device
+    hipDeviceAttributeL2CacheSize,                      ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
+    hipDeviceAttributeLocalL1CacheSupported,            ///< caching locals in L1 is supported
+    hipDeviceAttributeLuid,                             ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
+    hipDeviceAttributeLuidDeviceNodeMask,               ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
+    hipDeviceAttributeComputeCapabilityMajor,           ///< Major compute capability version number.
+    hipDeviceAttributeManagedMemory,                    ///< Device supports allocating managed memory on this system
+    hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Cuda only. Max block size per multiprocessor
+    hipDeviceAttributeMaxBlockDimX,                     ///< Max block size in width.
+    hipDeviceAttributeMaxBlockDimY,                     ///< Max block size in height.
+    hipDeviceAttributeMaxBlockDimZ,                     ///< Max block size in depth.
+    hipDeviceAttributeMaxGridDimX,                      ///< Max grid size  in width.
+    hipDeviceAttributeMaxGridDimY,                      ///< Max grid size  in height.
+    hipDeviceAttributeMaxGridDimZ,                      ///< Max grid size  in depth.
+    hipDeviceAttributeMaxSurface1D,                     ///< Maximum size of 1D surface.
+    hipDeviceAttributeMaxSurface1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered surface.
+    hipDeviceAttributeMaxSurface2D,                     ///< Maximum dimension (width, height) of 2D surface.
+    hipDeviceAttributeMaxSurface2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered surface.
+    hipDeviceAttributeMaxSurface3D,                     ///< Maximum dimension (width, height, depth) of 3D surface.
+    hipDeviceAttributeMaxSurfaceCubemap,                ///< Cuda only. Maximum dimensions of Cubemap surface.
+    hipDeviceAttributeMaxSurfaceCubemapLayered,         ///< Cuda only. Maximum dimension of Cubemap layered surface.
+    hipDeviceAttributeMaxTexture1DWidth,                ///< Maximum size of 1D texture.
+    hipDeviceAttributeMaxTexture1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered texture.
+    hipDeviceAttributeMaxTexture1DLinear,               ///< Maximum number of elements allocatable in a 1D linear texture.
+                                                        ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
+    hipDeviceAttributeMaxTexture1DMipmap,               ///< Cuda only. Maximum size of 1D mipmapped texture.
+    hipDeviceAttributeMaxTexture2DWidth,                ///< Maximum dimension width of 2D texture.
+    hipDeviceAttributeMaxTexture2DHeight,               ///< Maximum dimension hight of 2D texture.
+    hipDeviceAttributeMaxTexture2DGather,               ///< Cuda only. Maximum dimensions of 2D texture if gather operations  performed.
+    hipDeviceAttributeMaxTexture2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered texture.
+    hipDeviceAttributeMaxTexture2DLinear,               ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
+    hipDeviceAttributeMaxTexture2DMipmap,               ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
+    hipDeviceAttributeMaxTexture3DWidth,                ///< Maximum dimension width of 3D texture.
+    hipDeviceAttributeMaxTexture3DHeight,               ///< Maximum dimension height of 3D texture.
+    hipDeviceAttributeMaxTexture3DDepth,                ///< Maximum dimension depth of 3D texture.
+    hipDeviceAttributeMaxTexture3DAlt,                  ///< Cuda only. Maximum dimensions of alternate 3D texture.
+    hipDeviceAttributeMaxTextureCubemap,                ///< Cuda only. Maximum dimensions of Cubemap texture
+    hipDeviceAttributeMaxTextureCubemapLayered,         ///< Cuda only. Maximum dimensions of Cubemap layered texture.
+    hipDeviceAttributeMaxThreadsDim,                    ///< Maximum dimension of a block
+    hipDeviceAttributeMaxThreadsPerBlock,               ///< Maximum number of threads per block.
+    hipDeviceAttributeMaxThreadsPerMultiProcessor,      ///< Maximum resident threads per multiprocessor.
+    hipDeviceAttributeMaxPitch,                         ///< Maximum pitch in bytes allowed by memory copies
+    hipDeviceAttributeMemoryBusWidth,                   ///< Global memory bus width in bits.
+    hipDeviceAttributeMemoryClockRate,                  ///< Peak memory clock frequency in kilohertz.
+    hipDeviceAttributeComputeCapabilityMinor,           ///< Minor compute capability version number.
+    hipDeviceAttributeMultiGpuBoardGroupID,             ///< Cuda only. Unique ID of device group on the same multi-GPU board
+    hipDeviceAttributeMultiprocessorCount,              ///< Number of multiprocessors on the device.
+    hipDeviceAttributeName,                             ///< Device name.
+    hipDeviceAttributePageableMemoryAccess,             ///< Device supports coherently accessing pageable memory
+                                                        ///< without calling hipHostRegister on it
+    hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
+    hipDeviceAttributePciBusId,                         ///< PCI Bus ID.
+    hipDeviceAttributePciDeviceId,                      ///< PCI Device ID.
+    hipDeviceAttributePciDomainID,                      ///< PCI Domain ID.
+    hipDeviceAttributePersistingL2CacheMaxSize,         ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
+    hipDeviceAttributeMaxRegistersPerBlock,             ///< 32-bit registers available to a thread block. This number is shared
+                                                        ///< by all thread blocks simultaneously resident on a multiprocessor.
+    hipDeviceAttributeMaxRegistersPerMultiprocessor,    ///< 32-bit registers available per block.
+    hipDeviceAttributeReservedSharedMemPerBlock,        ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
+    hipDeviceAttributeMaxSharedMemoryPerBlock,          ///< Maximum shared memory available per block in bytes.
+    hipDeviceAttributeSharedMemPerBlockOptin,           ///< Cuda only. Maximum shared memory per block usable by special opt in.
+    hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Cuda only. Shared memory available per multiprocessor.
+    hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
+    hipDeviceAttributeStreamPrioritiesSupported,        ///< Cuda only. Whether to support stream priorities.
+    hipDeviceAttributeSurfaceAlignment,                 ///< Cuda only. Alignment requirement for surfaces
+    hipDeviceAttributeTccDriver,                        ///< Cuda only. Whether device is a Tesla device using TCC driver
+    hipDeviceAttributeTextureAlignment,                 ///< Alignment requirement for textures
+    hipDeviceAttributeTexturePitchAlignment,            ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
+    hipDeviceAttributeTotalConstantMemory,              ///< Constant memory size in bytes.
+    hipDeviceAttributeTotalGlobalMem,                   ///< Global memory available on devicice.
+    hipDeviceAttributeUnifiedAddressing,                ///< Cuda only. An unified address space shared with the host.
+    hipDeviceAttributeUuid,                             ///< Cuda only. Unique ID in 16 byte.
+    hipDeviceAttributeWarpSize,                         ///< Warp size in threads.
+
+    hipDeviceAttributeCudaCompatibleEnd = 9999,
+    hipDeviceAttributeAmdSpecificBegin = 10000,
+
+    hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin,  ///< Frequency in khz of the timer used by the device-side "clock*"
+    hipDeviceAttributeArch,                                     ///< Device architecture
+    hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,         ///< Maximum Shared Memory PerMultiprocessor.
+    hipDeviceAttributeGcnArch,                                  ///< Device gcn architecture
+    hipDeviceAttributeGcnArchName,                              ///< Device gcnArch name in 256 bytes
+    hipDeviceAttributeHdpMemFlushCntl,                          ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
+    hipDeviceAttributeHdpRegFlushCntl,                          ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
+    hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,      ///< Supports cooperative launch on multiple
+                                                                ///< devices with unmatched functions
+    hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,   ///< Supports cooperative launch on multiple
+                                                                ///< devices with unmatched grid dimensions
+    hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,  ///< Supports cooperative launch on multiple
+                                                                ///< devices with unmatched block dimensions
+    hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
+                                                                ///< devices with unmatched shared memories
+    hipDeviceAttributeIsLargeBar,                               ///< Whether it is LargeBar
+    hipDeviceAttributeAsicRevision,                             ///< Revision of the GPU in this device
+    hipDeviceAttributeCanUseStreamWaitValue,                    ///< '1' if Device supports hipStreamWaitValue32() and
+                                                                ///< hipStreamWaitValue64() , '0' otherwise.
+
+    hipDeviceAttributeAmdSpecificEnd = 19999,
+    hipDeviceAttributeVendorSpecificBegin = 20000,
+    // Extended attributes for vendors
+} hipDeviceAttribute_t;
+
+//! Flags that can be used with hipStreamCreateWithFlags
+#define hipStreamDefault                                                                           \
+    0x00  ///< Default stream creation flags. These are used with hipStreamCreate().
+#define hipStreamNonBlocking 0x01  ///< Stream does not implicitly synchronize with null stream
+
+
+//! Flags that can be used with hipEventCreateWithFlags:
+#define hipEventDefault 0x0  ///< Default flags
+#define hipEventBlockingSync                                                                       \
+    0x1  ///< Waiting will yield CPU.  Power-friendly and usage-friendly but may increase latency.
+#define hipEventDisableTiming                                                                      \
+    0x2  ///< Disable event's capability to record timing information.  May improve performance.
+#define hipEventInterprocess 0x4  ///< Event can support IPC.  @warning - not supported in HIP.
+#define hipEventReleaseToDevice                                                                    \
+    0x40000000  /// < Use a device-scope release when recording this event.  This flag is useful to
+                /// obtain more precise timings of commands between events.  The flag is a no-op on
+                /// CUDA platforms.
+#define hipEventReleaseToSystem                                                                    \
+    0x80000000  /// < Use a system-scope release when recording this event.  This flag is
+                /// useful to make non-coherent host memory visible to the host.  The flag is a
+                /// no-op on CUDA platforms.
+
+
+#define hipDeviceScheduleAuto 0x0  ///< Automatically select between Spin and Yield
+#define hipDeviceScheduleSpin                                                                      \
+    0x1  ///< Dedicate a CPU core to spin-wait.  Provides lowest latency, but burns a CPU core and
+         ///< may consume more power.
+#define hipDeviceScheduleYield                                                                     \
+    0x2  ///< Yield the CPU to the operating system when waiting.  May increase latency, but lowers
+         ///< power and is friendlier to other threads in the system.
+#define hipDeviceScheduleBlockingSync 0x4
+#define hipDeviceScheduleMask 0x7
+#define hipDeviceMapHost 0x8
+#define hipDeviceLmemResizeToMax 0x16
+
+typedef enum hipJitOption {
+    hipJitOptionMaxRegisters = 0,
+    hipJitOptionThreadsPerBlock,
+    hipJitOptionWallTime,
+    hipJitOptionInfoLogBuffer,
+    hipJitOptionInfoLogBufferSizeBytes,
+    hipJitOptionErrorLogBuffer,
+    hipJitOptionErrorLogBufferSizeBytes,
+    hipJitOptionOptimizationLevel,
+    hipJitOptionTargetFromContext,
+    hipJitOptionTarget,
+    hipJitOptionFallbackStrategy,
+    hipJitOptionGenerateDebugInfo,
+    hipJitOptionLogVerbose,
+    hipJitOptionGenerateLineInfo,
+    hipJitOptionCacheMode,
+    hipJitOptionSm3xOpt,
+    hipJitOptionFastCompile,
+    hipJitOptionNumOptions
+} hipJitOption;
+
+// stop: hip_runtime_api.h
 
-    HIP_JIT_NUM_INPUT_TYPES
-} HIPjitInputType;
 
 #ifdef _WIN32
 #define HIPAPI __stdcall
@@ -1004,66 +353,44 @@ typedef enum HIPjitInputType_enum
 
 #define HIP_API_CALL HIPAPI
 
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXCREATE)              (HIPcontext *, unsigned int, HIPdevice);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXDESTROY)             (HIPcontext);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCACHECONFIG)      (HIPfunc_cache *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCURRENT)          (HIPcontext *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETSHAREDMEMCONFIG)  (HIPsharedconfig *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPOPCURRENT)          (HIPcontext *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT)         (HIPcontext);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCACHECONFIG)      (HIPfunc_cache);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCURRENT)          (HIPcontext);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETSHAREDMEMCONFIG)  (HIPsharedconfig);
-typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE)         ();
-typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE)     (int *, HIPdevice_attribute, HIPdevice);
-typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT)         (int *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGET)              (HIPdevice *, int);
-typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETNAME)          (char *, int, HIPdevice);
-typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICETOTALMEM)         (size_t *, HIPdevice);
-typedef HIPresult (HIP_API_CALL *HIP_HIPDRIVERGETVERSION)       (int *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTCREATE)            (HIPevent *, unsigned int);
-typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTDESTROY)           (HIPevent);
-typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME)       (float *, HIPevent, HIPevent);
-typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTQUERY)             (HIPevent);
-typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTRECORD)            (HIPevent, HIPstream);
-typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE)       (HIPevent);
-typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE)       (int *, HIPfunction_attribute, HIPfunction);
-typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETATTRIBUTE)       (HIPfunction, HIPfunction_attribute, int);
-typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETCACHECONFIG)     (HIPfunction, HIPfunc_cache);
-typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETSHAREDMEMCONFIG) (HIPfunction, HIPsharedconfig);
-typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORNAME)           (HIPresult, const char **);
-typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORSTRING)         (HIPresult, const char **);
-typedef HIPresult (HIP_API_CALL *HIP_HIPINIT)                   (unsigned int);
-typedef HIPresult (HIP_API_CALL *HIP_HIPLAUNCHKERNEL)           (HIPfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, HIPstream, void **, void **);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOC)               (HIPdeviceptr *, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOCHOST)           (void **, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOD)             (HIPdeviceptr, HIPdeviceptr, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC)        (HIPdeviceptr, HIPdeviceptr, size_t, HIPstream);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOH)             (void *, HIPdeviceptr, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC)        (void *, HIPdeviceptr, size_t, HIPstream);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTOD)             (HIPdeviceptr, const void *, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC)        (HIPdeviceptr, const void *, size_t, HIPstream);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE)                (HIPdeviceptr);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST)            (void *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO)             (size_t *, size_t *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD32)              (HIPdeviceptr, unsigned int, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD8)               (HIPdeviceptr, unsigned char, size_t);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION)      (HIPfunction *, HIPmodule, const char *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL)        (HIPdeviceptr *, size_t *, HIPmodule, const char *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOAD)             (HIPmodule *, const char *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATA)         (HIPmodule *, const void *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATAEX)       (HIPmodule *, const void *, unsigned int, HIPjit_option *, void **);
-typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEUNLOAD)           (HIPmodule);
-typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTART)          ();
-typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTOP)           ();
-typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMCREATE)           (HIPstream *, unsigned int);
-typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMDESTROY)          (HIPstream);
-typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE)      (HIPstream);
-typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMWAITEVENT)        (HIPstream, HIPevent, unsigned int);
-typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCREATE)             (unsigned int, HIPjit_option *, void **, HIPlinkState *);
-typedef HIPresult (HIP_API_CALL *HIP_HIPLINKADDDATA)            (HIPlinkState, HIPjitInputType, void *, size_t, const char *, unsigned int, HIPjit_option *, void **);
-typedef HIPresult (HIP_API_CALL *HIP_HIPLINKDESTROY)            (HIPlinkState);
-typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCOMPLETE)           (HIPlinkState, void **, size_t *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE)              (hipCtx_t *, unsigned int, hipDevice_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY)             (hipCtx_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT)          (hipCtx_t *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT)         (hipCtx_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT)          (hipCtx_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE)         ();
+typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE)     (int *, hipDeviceAttribute_t, hipDevice_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT)         (int *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGET)              (hipDevice_t *, int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME)          (char *, int, hipDevice_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM)         (size_t *, hipDevice_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION)       (int *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE)            (hipEvent_t *, unsigned int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY)           (hipEvent_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME)       (float *, hipEvent_t, hipEvent_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD)            (hipEvent_t, hipStream_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE)       (hipEvent_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE)       (int *, hipFunction_attribute, hipFunction_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPGETERRORNAME)           (hipError_t, const char **);
+typedef hipError_t (HIP_API_CALL *HIP_HIPGETERRORSTRING)         (hipError_t, const char **);
+typedef hipError_t (HIP_API_CALL *HIP_HIPINIT)                   (unsigned int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPLAUNCHKERNEL)           (hipFunction_t, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, hipStream_t, void **, void **);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMALLOC)               (hipDeviceptr_t *, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMFREE)                (hipDeviceptr_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMGETINFO)             (size_t *, size_t *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOD)             (hipDeviceptr_t, hipDeviceptr_t, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC)        (hipDeviceptr_t, hipDeviceptr_t, size_t, hipStream_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOH)             (void *, hipDeviceptr_t, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC)        (void *, hipDeviceptr_t, size_t, hipStream_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTOD)             (hipDeviceptr_t, const void *, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC)        (hipDeviceptr_t, const void *, size_t, hipStream_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION)      (hipFunction_t *, hipModule_t, const char *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL)        (hipDeviceptr_t *, size_t *, hipModule_t, const char *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX)       (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD)           (hipModule_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE)           (hipStream_t *, unsigned int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY)          (hipStream_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE)      (hipStream_t);
 
 typedef struct hc_hip_lib
 {
@@ -1071,14 +398,9 @@ typedef struct hc_hip_lib
 
   HIP_HIPCTXCREATE              hipCtxCreate;
   HIP_HIPCTXDESTROY             hipCtxDestroy;
-  HIP_HIPCTXGETCACHECONFIG      hipCtxGetCacheConfig;
-  HIP_HIPCTXGETCURRENT          hipCtxGetCurrent;
-  HIP_HIPCTXGETSHAREDMEMCONFIG  hipCtxGetSharedMemConfig;
   HIP_HIPCTXPOPCURRENT          hipCtxPopCurrent;
   HIP_HIPCTXPUSHCURRENT         hipCtxPushCurrent;
-  HIP_HIPCTXSETCACHECONFIG      hipCtxSetCacheConfig;
   HIP_HIPCTXSETCURRENT          hipCtxSetCurrent;
-  HIP_HIPCTXSETSHAREDMEMCONFIG  hipCtxSetSharedMemConfig;
   HIP_HIPCTXSYNCHRONIZE         hipCtxSynchronize;
   HIP_HIPDEVICEGETATTRIBUTE     hipDeviceGetAttribute;
   HIP_HIPDEVICEGETCOUNT         hipDeviceGetCount;
@@ -1089,46 +411,29 @@ typedef struct hc_hip_lib
   HIP_HIPEVENTCREATE            hipEventCreate;
   HIP_HIPEVENTDESTROY           hipEventDestroy;
   HIP_HIPEVENTELAPSEDTIME       hipEventElapsedTime;
-  HIP_HIPEVENTQUERY             hipEventQuery;
   HIP_HIPEVENTRECORD            hipEventRecord;
   HIP_HIPEVENTSYNCHRONIZE       hipEventSynchronize;
   HIP_HIPFUNCGETATTRIBUTE       hipFuncGetAttribute;
-  HIP_HIPFUNCSETATTRIBUTE       hipFuncSetAttribute;
-  HIP_HIPFUNCSETCACHECONFIG     hipFuncSetCacheConfig;
-  HIP_HIPFUNCSETSHAREDMEMCONFIG hipFuncSetSharedMemConfig;
   HIP_HIPGETERRORNAME           hipGetErrorName;
   HIP_HIPGETERRORSTRING         hipGetErrorString;
   HIP_HIPINIT                   hipInit;
   HIP_HIPLAUNCHKERNEL           hipLaunchKernel;
   HIP_HIPMEMALLOC               hipMemAlloc;
-  HIP_HIPMEMALLOCHOST           hipMemAllocHost;
+  HIP_HIPMEMFREE                hipMemFree;
+  HIP_HIPMEMGETINFO             hipMemGetInfo;
   HIP_HIPMEMCPYDTOD             hipMemcpyDtoD;
   HIP_HIPMEMCPYDTODASYNC        hipMemcpyDtoDAsync;
   HIP_HIPMEMCPYDTOH             hipMemcpyDtoH;
   HIP_HIPMEMCPYDTOHASYNC        hipMemcpyDtoHAsync;
   HIP_HIPMEMCPYHTOD             hipMemcpyHtoD;
   HIP_HIPMEMCPYHTODASYNC        hipMemcpyHtoDAsync;
-  HIP_HIPMEMFREE                hipMemFree;
-  HIP_HIPMEMFREEHOST            hipMemFreeHost;
-  HIP_HIPMEMGETINFO             hipMemGetInfo;
-  HIP_HIPMEMSETD32              hipMemsetD32;
-  HIP_HIPMEMSETD8               hipMemsetD8;
   HIP_HIPMODULEGETFUNCTION      hipModuleGetFunction;
   HIP_HIPMODULEGETGLOBAL        hipModuleGetGlobal;
-  HIP_HIPMODULELOAD             hipModuleLoad;
-  HIP_HIPMODULELOADDATA         hipModuleLoadData;
   HIP_HIPMODULELOADDATAEX       hipModuleLoadDataEx;
   HIP_HIPMODULEUNLOAD           hipModuleUnload;
-  HIP_HIPPROFILERSTART          hipProfilerStart;
-  HIP_HIPPROFILERSTOP           hipProfilerStop;
   HIP_HIPSTREAMCREATE           hipStreamCreate;
   HIP_HIPSTREAMDESTROY          hipStreamDestroy;
   HIP_HIPSTREAMSYNCHRONIZE      hipStreamSynchronize;
-  HIP_HIPSTREAMWAITEVENT        hipStreamWaitEvent;
-  HIP_HIPLINKCREATE             hipLinkCreate;
-  HIP_HIPLINKADDDATA            hipLinkAddData;
-  HIP_HIPLINKDESTROY            hipLinkDestroy;
-  HIP_HIPLINKCOMPLETE           hipLinkComplete;
 
 } hc_hip_lib_t;
 
diff --git a/include/ext_hiprtc.h b/include/ext_hiprtc.h
index cd1be6c4b..347239c38 100644
--- a/include/ext_hiprtc.h
+++ b/include/ext_hiprtc.h
@@ -6,41 +6,26 @@
 #ifndef _EXT_HIPRTC_H
 #define _EXT_HIPRTC_H
 
-/**
- * from hip_runtime.h (/opt/rocm/hip/include/hip/amd_detail/hiprtc.h)
- */
+// start: amd_detail/hiprtc.h
 
-/**
- * \ingroup error
- * \brief   The enumerated type hiprtcResult defines API call result codes.
- *          HIPRTC API functions return hiprtcResult to indicate the call
- *          result.
- */
-typedef enum {
-  HIPRTC_SUCCESS = 0,
-  HIPRTC_ERROR_OUT_OF_MEMORY = 1,
-  HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
-  HIPRTC_ERROR_INVALID_INPUT = 3,
-  HIPRTC_ERROR_INVALID_PROGRAM = 4,
-  HIPRTC_ERROR_INVALID_OPTION = 5,
-  HIPRTC_ERROR_COMPILATION = 6,
-  HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
-  HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
-  HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
-  HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
-  HIPRTC_ERROR_INTERNAL_ERROR = 11
+typedef enum hiprtcResult {
+    HIPRTC_SUCCESS = 0,
+    HIPRTC_ERROR_OUT_OF_MEMORY = 1,
+    HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
+    HIPRTC_ERROR_INVALID_INPUT = 3,
+    HIPRTC_ERROR_INVALID_PROGRAM = 4,
+    HIPRTC_ERROR_INVALID_OPTION = 5,
+    HIPRTC_ERROR_COMPILATION = 6,
+    HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+    HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
+    HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
+    HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
+    HIPRTC_ERROR_INTERNAL_ERROR = 11
 } hiprtcResult;
 
-/**
- * \ingroup compilation
- * \brief   hiprtcProgram is the unit of compilation, and an opaque handle for
- *          a program.
- *
- * To compile a CUDA program string, an instance of hiprtcProgram must be
- * created first with ::hiprtcCreateProgram, then compiled with
- * ::hiprtcCompileProgram.
- */
-typedef struct _hiprtcProgram *hiprtcProgram;
+typedef struct _hiprtcProgram* hiprtcProgram;
+
+// stop: amd_detail/hiprtc.h
 
 #ifdef _WIN32
 #define HIPRTCAPI __stdcall
@@ -54,13 +39,12 @@ typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCADDNAMEEXPRESSION)  (hiprtc
 typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCCOMPILEPROGRAM)     (hiprtcProgram, int, const char * const *);
 typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCCREATEPROGRAM)      (hiprtcProgram *, const char *, const char *, int, const char * const *, const char * const *);
 typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCDESTROYPROGRAM)     (hiprtcProgram *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETCODE)            (hiprtcProgram, char *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETCODESIZE)        (hiprtcProgram, size_t *);
 typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETLOWEREDNAME)     (hiprtcProgram, const char * const, const char **);
-typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTX)             (hiprtcProgram, char *);
-typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTXSIZE)         (hiprtcProgram, size_t *);
 typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOG)      (hiprtcProgram, char *);
 typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOGSIZE)  (hiprtcProgram, size_t *);
-typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING)      (hiprtcResult);
-typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCVERSION)            (int *, int *);
+typedef const char *  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING)     (hiprtcResult);
 
 typedef struct hc_hiprtc_lib
 {
@@ -70,13 +54,12 @@ typedef struct hc_hiprtc_lib
   HIPRTC_HIPRTCCOMPILEPROGRAM     hiprtcCompileProgram;
   HIPRTC_HIPRTCCREATEPROGRAM      hiprtcCreateProgram;
   HIPRTC_HIPRTCDESTROYPROGRAM     hiprtcDestroyProgram;
+  HIPRTC_HIPRTCGETCODE            hiprtcGetCode;
+  HIPRTC_HIPRTCGETCODESIZE        hiprtcGetCodeSize;
   HIPRTC_HIPRTCGETLOWEREDNAME     hiprtcGetLoweredName;
-  HIPRTC_HIPRTCGETPTX             hiprtcGetCode;
-  HIPRTC_HIPRTCGETPTXSIZE         hiprtcGetCodeSize;
   HIPRTC_HIPRTCGETPROGRAMLOG      hiprtcGetProgramLog;
   HIPRTC_HIPRTCGETPROGRAMLOGSIZE  hiprtcGetProgramLogSize;
   HIPRTC_HIPRTCGETERRORSTRING     hiprtcGetErrorString;
-  HIPRTC_HIPRTCVERSION            hiprtcVersion;
 
 } hc_hiprtc_lib_t;
 
diff --git a/include/types.h b/include/types.h
index ed22a95ee..efc56439b 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1502,80 +1502,80 @@ typedef struct hc_device_param
 
   int               hip_warp_size;
 
-  HIPdevice         hip_device;
-  HIPcontext        hip_context;
-  HIPstream         hip_stream;
-
-  HIPevent          hip_event1;
-  HIPevent          hip_event2;
-
-  HIPmodule         hip_module;
-  HIPmodule         hip_module_shared;
-  HIPmodule         hip_module_mp;
-  HIPmodule         hip_module_amp;
-
-  HIPfunction       hip_function1;
-  HIPfunction       hip_function12;
-  HIPfunction       hip_function2p;
-  HIPfunction       hip_function2;
-  HIPfunction       hip_function2e;
-  HIPfunction       hip_function23;
-  HIPfunction       hip_function3;
-  HIPfunction       hip_function4;
-  HIPfunction       hip_function_init2;
-  HIPfunction       hip_function_loop2p;
-  HIPfunction       hip_function_loop2;
-  HIPfunction       hip_function_mp;
-  HIPfunction       hip_function_mp_l;
-  HIPfunction       hip_function_mp_r;
-  HIPfunction       hip_function_amp;
-  HIPfunction       hip_function_tm;
-  HIPfunction       hip_function_memset;
-  HIPfunction       hip_function_bzero;
-  HIPfunction       hip_function_atinit;
-  HIPfunction       hip_function_utf8toutf16le;
-  HIPfunction       hip_function_decompress;
-  HIPfunction       hip_function_aux1;
-  HIPfunction       hip_function_aux2;
-  HIPfunction       hip_function_aux3;
-  HIPfunction       hip_function_aux4;
-
-  HIPdeviceptr      hip_d_pws_buf;
-  HIPdeviceptr      hip_d_pws_amp_buf;
-  HIPdeviceptr      hip_d_pws_comp_buf;
-  HIPdeviceptr      hip_d_pws_idx;
-  HIPdeviceptr      hip_d_rules;
-  HIPdeviceptr      hip_d_rules_c;
-  HIPdeviceptr      hip_d_combs;
-  HIPdeviceptr      hip_d_combs_c;
-  HIPdeviceptr      hip_d_bfs;
-  HIPdeviceptr      hip_d_bfs_c;
-  HIPdeviceptr      hip_d_tm_c;
-  HIPdeviceptr      hip_d_bitmap_s1_a;
-  HIPdeviceptr      hip_d_bitmap_s1_b;
-  HIPdeviceptr      hip_d_bitmap_s1_c;
-  HIPdeviceptr      hip_d_bitmap_s1_d;
-  HIPdeviceptr      hip_d_bitmap_s2_a;
-  HIPdeviceptr      hip_d_bitmap_s2_b;
-  HIPdeviceptr      hip_d_bitmap_s2_c;
-  HIPdeviceptr      hip_d_bitmap_s2_d;
-  HIPdeviceptr      hip_d_plain_bufs;
-  HIPdeviceptr      hip_d_digests_buf;
-  HIPdeviceptr      hip_d_digests_shown;
-  HIPdeviceptr      hip_d_salt_bufs;
-  HIPdeviceptr      hip_d_esalt_bufs;
-  HIPdeviceptr      hip_d_tmps;
-  HIPdeviceptr      hip_d_hooks;
-  HIPdeviceptr      hip_d_result;
-  HIPdeviceptr      hip_d_extra0_buf;
-  HIPdeviceptr      hip_d_extra1_buf;
-  HIPdeviceptr      hip_d_extra2_buf;
-  HIPdeviceptr      hip_d_extra3_buf;
-  HIPdeviceptr      hip_d_root_css_buf;
-  HIPdeviceptr      hip_d_markov_css_buf;
-  HIPdeviceptr      hip_d_st_digests_buf;
-  HIPdeviceptr      hip_d_st_salts_buf;
-  HIPdeviceptr      hip_d_st_esalts_buf;
+  hipDevice_t       hip_device;
+  hipCtx_t          hip_context;
+  hipStream_t       hip_stream;
+
+  hipEvent_t        hip_event1;
+  hipEvent_t        hip_event2;
+
+  hipModule_t       hip_module;
+  hipModule_t       hip_module_shared;
+  hipModule_t       hip_module_mp;
+  hipModule_t       hip_module_amp;
+
+  hipFunction_t     hip_function1;
+  hipFunction_t     hip_function12;
+  hipFunction_t     hip_function2p;
+  hipFunction_t     hip_function2;
+  hipFunction_t     hip_function2e;
+  hipFunction_t     hip_function23;
+  hipFunction_t     hip_function3;
+  hipFunction_t     hip_function4;
+  hipFunction_t     hip_function_init2;
+  hipFunction_t     hip_function_loop2p;
+  hipFunction_t     hip_function_loop2;
+  hipFunction_t     hip_function_mp;
+  hipFunction_t     hip_function_mp_l;
+  hipFunction_t     hip_function_mp_r;
+  hipFunction_t     hip_function_amp;
+  hipFunction_t     hip_function_tm;
+  hipFunction_t     hip_function_memset;
+  hipFunction_t     hip_function_bzero;
+  hipFunction_t     hip_function_atinit;
+  hipFunction_t     hip_function_utf8toutf16le;
+  hipFunction_t     hip_function_decompress;
+  hipFunction_t     hip_function_aux1;
+  hipFunction_t     hip_function_aux2;
+  hipFunction_t     hip_function_aux3;
+  hipFunction_t     hip_function_aux4;
+
+  hipDeviceptr_t    hip_d_pws_buf;
+  hipDeviceptr_t    hip_d_pws_amp_buf;
+  hipDeviceptr_t    hip_d_pws_comp_buf;
+  hipDeviceptr_t    hip_d_pws_idx;
+  hipDeviceptr_t    hip_d_rules;
+  hipDeviceptr_t    hip_d_rules_c;
+  hipDeviceptr_t    hip_d_combs;
+  hipDeviceptr_t    hip_d_combs_c;
+  hipDeviceptr_t    hip_d_bfs;
+  hipDeviceptr_t    hip_d_bfs_c;
+  hipDeviceptr_t    hip_d_tm_c;
+  hipDeviceptr_t    hip_d_bitmap_s1_a;
+  hipDeviceptr_t    hip_d_bitmap_s1_b;
+  hipDeviceptr_t    hip_d_bitmap_s1_c;
+  hipDeviceptr_t    hip_d_bitmap_s1_d;
+  hipDeviceptr_t    hip_d_bitmap_s2_a;
+  hipDeviceptr_t    hip_d_bitmap_s2_b;
+  hipDeviceptr_t    hip_d_bitmap_s2_c;
+  hipDeviceptr_t    hip_d_bitmap_s2_d;
+  hipDeviceptr_t    hip_d_plain_bufs;
+  hipDeviceptr_t    hip_d_digests_buf;
+  hipDeviceptr_t    hip_d_digests_shown;
+  hipDeviceptr_t    hip_d_salt_bufs;
+  hipDeviceptr_t    hip_d_esalt_bufs;
+  hipDeviceptr_t    hip_d_tmps;
+  hipDeviceptr_t    hip_d_hooks;
+  hipDeviceptr_t    hip_d_result;
+  hipDeviceptr_t    hip_d_extra0_buf;
+  hipDeviceptr_t    hip_d_extra1_buf;
+  hipDeviceptr_t    hip_d_extra2_buf;
+  hipDeviceptr_t    hip_d_extra3_buf;
+  hipDeviceptr_t    hip_d_root_css_buf;
+  hipDeviceptr_t    hip_d_markov_css_buf;
+  hipDeviceptr_t    hip_d_st_digests_buf;
+  hipDeviceptr_t    hip_d_st_salts_buf;
+  hipDeviceptr_t    hip_d_st_esalts_buf;
 
   // API: opencl
 
@@ -1726,8 +1726,7 @@ typedef struct backend_ctx
   int                 rc_hip_init;
   int                 rc_hiprtc_init;
 
-  int                 hiprtc_driver_version;
-  int                 hip_driver_version;
+  int                 hip_driverVersion;
 
   // opencl
 
diff --git a/src/backend.c b/src/backend.c
index 31d6bade1..2982b7820 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -980,11 +980,11 @@ int hiprtc_init (hashcat_ctx_t *hashcat_ctx)
   memset (hiprtc, 0, sizeof (HIPRTC_PTR));
 
   #if   defined (_WIN)
-  hiprtc->lib = hc_dlopen ("fixme.dll");
+  hiprtc->lib = hc_dlopen ("amdhip64.dll");
   #elif defined (__APPLE__)
   hiprtc->lib = hc_dlopen ("fixme.dylib");
   #elif defined (__CYGWIN__)
-  hiprtc->lib = hc_dlopen ("fixme.dll");
+  hiprtc->lib = hc_dlopen ("amdhip64.dll");
   #else
   hiprtc->lib = hc_dlopen ("libamdhip64.so");
 
@@ -998,12 +998,11 @@ int hiprtc_init (hashcat_ctx_t *hashcat_ctx)
   HC_LOAD_FUNC (hiprtc, hiprtcCreateProgram,      HIPRTC_HIPRTCCREATEPROGRAM,     HIPRTC, 1);
   HC_LOAD_FUNC (hiprtc, hiprtcDestroyProgram,     HIPRTC_HIPRTCDESTROYPROGRAM,    HIPRTC, 1);
   HC_LOAD_FUNC (hiprtc, hiprtcGetLoweredName,     HIPRTC_HIPRTCGETLOWEREDNAME,    HIPRTC, 1);
-  HC_LOAD_FUNC (hiprtc, hiprtcGetCode,            HIPRTC_HIPRTCGETPTX,            HIPRTC, 1);
-  HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize,        HIPRTC_HIPRTCGETPTXSIZE,        HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetCode,            HIPRTC_HIPRTCGETCODE,           HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize,        HIPRTC_HIPRTCGETCODESIZE,       HIPRTC, 1);
   HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLog,      HIPRTC_HIPRTCGETPROGRAMLOG,     HIPRTC, 1);
   HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLogSize,  HIPRTC_HIPRTCGETPROGRAMLOGSIZE, HIPRTC, 1);
   HC_LOAD_FUNC (hiprtc, hiprtcGetErrorString,     HIPRTC_HIPRTCGETERRORSTRING,    HIPRTC, 1);
-  HC_LOAD_FUNC (hiprtc, hiprtcVersion,            HIPRTC_HIPRTCVERSION,           HIPRTC, 1);
 
   return 0;
 }
@@ -1069,11 +1068,6 @@ int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int
 
   HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
 
-  #if 0
-  for(int i =0; i< numOptions; i++)
-    printf("Option_%d = %s\n", i, options[i]);
-  #endif
-
   const hiprtcResult HIPRTC_err = hiprtc->hiprtcCompileProgram (prog, numOptions, options);
 
   if (HIPRTC_err != HIPRTC_SUCCESS)
@@ -1122,13 +1116,13 @@ int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char
   return 0;
 }
 
-int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet)
+int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *codeSizeRet)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
 
-  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, ptxSizeRet);
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, codeSizeRet);
 
   if (HIPRTC_err != HIPRTC_SUCCESS)
   {
@@ -1140,13 +1134,13 @@ int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t
   return 0;
 }
 
-int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx)
+int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *code)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
 
-  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, ptx);
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, code);
 
   if (HIPRTC_err != HIPRTC_SUCCESS)
   {
@@ -1158,24 +1152,6 @@ int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx)
   return 0;
 }
 
-int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
-
-  const hiprtcResult HIPRTC_err = hiprtc->hiprtcVersion (major, minor);
-
-  if (HIPRTC_err != HIPRTC_SUCCESS)
-  {
-    event_log_error (hashcat_ctx, "hiprtcVersion(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
-
-    return -1;
-  }
-
-  return 0;
-}
-
 // CUDA
 
 int cuda_init (hashcat_ctx_t *hashcat_ctx)
@@ -2478,11 +2454,11 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
   memset (hip, 0, sizeof (HIP_PTR));
 
   #if   defined (_WIN)
-  hip->lib = hc_dlopen ("fixme.dll");
+  hip->lib = hc_dlopen ("amdhip64.dll");
   #elif defined (__APPLE__)
   hip->lib = hc_dlopen ("fixme.dylib");
   #elif defined (__CYGWIN__)
-  hip->lib = hc_dlopen ("fixme.dll");
+  hip->lib = hc_dlopen ("amdhip64.dll");
   #else
   hip->lib = hc_dlopen ("libamdhip64.so");
 
@@ -2516,67 +2492,42 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
 
   HC_LOAD_FUNC_HIP (hip, hipCtxCreate,              hipCtxCreate,               HIP_HIPCTXCREATE,               HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipCtxDestroy,             hipCtxDestroy,              HIP_HIPCTXDESTROY,              HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig,      hipCtxGetCacheConfig,       HIP_HIPCTXGETCACHECONFIG,       HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent,          hipCtxGetCurrent,           HIP_HIPCTXGETCURRENT,           HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig,  hipCtxGetSharedMemConfig,   HIP_HIPCTXGETSHAREDMEMCONFIG,   HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent,          hipCtxPopCurrent,           HIP_HIPCTXPOPCURRENT,           HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent,         hipCtxPushCurrent,          HIP_HIPCTXPUSHCURRENT,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig,      hipCtxSetCacheConfig,       HIP_HIPCTXSETCACHECONFIG,       HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent,          hipCtxSetCurrent,           HIP_HIPCTXSETCURRENT,           HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig,  hipCtxSetSharedMemConfig,   HIP_HIPCTXSETSHAREDMEMCONFIG,   HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize,         hipCtxSynchronize,          HIP_HIPCTXSYNCHRONIZE,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGet,              hipDeviceGet,               HIP_HIPDEVICEGET,               HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute,     hipDeviceGetAttribute,      HIP_HIPDEVICEGETATTRIBUTE,      HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount,         hipGetDeviceCount,          HIP_HIPDEVICEGETCOUNT,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDeviceGet,              hipDeviceGet,               HIP_HIPDEVICEGET,               HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipDeviceGetName,          hipDeviceGetName,           HIP_HIPDEVICEGETNAME,           HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem,         hipDeviceTotalMem,          HIP_HIPDEVICETOTALMEM,          HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion,       hipDriverGetVersion,        HIP_HIPDRIVERGETVERSION,        HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipEventCreate,            hipEventCreateWithFlags,    HIP_HIPEVENTCREATE,             HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipEventDestroy,           hipEventDestroy,            HIP_HIPEVENTDESTROY,            HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime,       hipEventElapsedTime,        HIP_HIPEVENTELAPSEDTIME,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipEventQuery,             hipEventQuery,              HIP_HIPEVENTQUERY,              HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipEventRecord,            hipEventRecord,             HIP_HIPEVENTRECORD,             HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipEventSynchronize,       hipEventSynchronize,        HIP_HIPEVENTSYNCHRONIZE,        HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute,       hipFuncGetAttribute,        HIP_HIPFUNCGETATTRIBUTE,        HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute,       hipFuncSetAttribute,        HIP_HIPFUNCSETATTRIBUTE,        HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig,     hipFuncSetCacheConfig,      HIP_HIPFUNCSETCACHECONFIG,      HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig,  HIP_HIPFUNCSETSHAREDMEMCONFIG,  HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipGetErrorName,            HIP_HIPGETERRORNAME,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipGetErrorName,            HIP_HIPGETERRORNAME,            HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipGetErrorString,         hipGetErrorString,          HIP_HIPGETERRORSTRING,          HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipInit,                   hipInit,                    HIP_HIPINIT,                    HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipLaunchKernel,           hipModuleLaunchKernel,      HIP_HIPLAUNCHKERNEL,            HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemAlloc,               hipMalloc,                  HIP_HIPMEMALLOC,                HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemAllocHost,           hipMemAllocHost,            HIP_HIPMEMALLOCHOST,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemFree,                hipFree,                    HIP_HIPMEMFREE,                 HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemGetInfo,             hipMemGetInfo,              HIP_HIPMEMGETINFO,              HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD,             hipMemcpyDtoD,              HIP_HIPMEMCPYDTOD,              HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync,        hipMemcpyDtoDAsync,         HIP_HIPMEMCPYDTODASYNC,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH,             hipMemcpyDtoH,              HIP_HIPMEMCPYDTOH,              HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync,        hipMemcpyDtoHAsync,         HIP_HIPMEMCPYDTOHASYNC,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD,             hipMemcpyHtoD,              HIP_HIPMEMCPYHTOD,              HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync,        hipMemcpyHtoDAsync,         HIP_HIPMEMCPYHTODASYNC,         HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemFree,                hipFree,                    HIP_HIPMEMFREE,                 HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemFreeHost,            hipFreeHost,                HIP_HIPMEMFREEHOST,             HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemGetInfo,             hipMemGetInfo,              HIP_HIPMEMGETINFO,              HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipMemsetD32,              hipMemsetD32,               HIP_HIPMEMSETD32,               HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipMemsetD8,               hipMemsetD8,                HIP_HIPMEMSETD8,                HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction,      hipModuleGetFunction,       HIP_HIPMODULEGETFUNCTION,       HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal,        hipModuleGetGlobal,         HIP_HIPMODULEGETGLOBAL,         HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleLoad,             hipModuleLoad,              HIP_HIPMODULELOAD,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleLoadData,         hipModuleLoadData,          HIP_HIPMODULELOADDATA,          HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx,       hipModuleLoadDataEx,        HIP_HIPMODULELOADDATAEX,        HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleUnload,           hipModuleUnload,            HIP_HIPMODULEUNLOAD,            HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipProfilerStart,          hipProfilerStart,           HIP_HIPPROFILERSTART,           HIP, 1);
-  //HC_LOAD_FUNC_HIP (hip, hipProfilerStop,           hipProfilerStop,            HIP_HIPPROFILERSTOP,            HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipStreamCreate,           hipStreamCreate,            HIP_HIPSTREAMCREATE,            HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipStreamDestroy,          hipStreamDestroy,           HIP_HIPSTREAMDESTROY,           HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize,      hipStreamSynchronize,       HIP_HIPSTREAMSYNCHRONIZE,       HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent,        hipStreamWaitEvent,         HIP_HIPSTREAMWAITEVENT,         HIP, 1);
-  //TODO HIP?
-  #if defined (WITH_CUBINX)
-  HC_LOAD_FUNC_HIP (hip, hipLinkCreate,             hipLinkCreate,              HIP_HIPLINKCREATE,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipLinkAddData,            hipLinkAddData,             HIP_HIPLINKADDDATA,             HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipLinkDestroy,            hipLinkDestroy,             HIP_HIPLINKDESTROY,             HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipLinkComplete,           hipLinkComplete,            HIP_HIPLINKCOMPLETE,            HIP, 1);
-  #endif
 
   return 0;
 }
@@ -2600,208 +2551,19 @@ void hip_close (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags)
+int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipInit (Flags);
+  const hipError_t HIP_err = hip->hipCtxCreate (pctx, flags, dev);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipInit(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipDeviceGetCount (count);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGetCount(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGetCount(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice* device, int ordinal)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipDeviceGet (device, ordinal);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipDeviceGetName (name, len, dev);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGetName(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipDeviceGetName(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipDeviceTotalMem (bytes, dev);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipDriverGetVersion (driverVersion);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
-    {
-      event_log_error (hashcat_ctx, "hipDriverGetVersion(): %s", pStr);
-    }
-    else
-    {
-      event_log_error (hashcat_ctx, "hipDriverGetVersion(): %d", HIP_err);
-    }
-
-    return -1;
-  }
-
-  return 0;
-}
-
-int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
-
-  const HIPresult HIP_err = hip->hipCtxCreate (pctx, flags, dev);
-
-  if (HIP_err != HIP_SUCCESS)
-  {
-    const char *pStr = NULL;
-
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
       event_log_error (hashcat_ctx, "hipCtxCreate(): %s", pStr);
     }
@@ -2816,19 +2578,19 @@ int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int
   return 0;
 }
 
-int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
+int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipCtxDestroy (ctx);
+  const hipError_t HIP_err = hip->hipCtxDestroy (ctx);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
       event_log_error (hashcat_ctx, "hipCtxDestroy(): %s", pStr);
     }
@@ -2843,25 +2605,25 @@ int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
   return 0;
 }
 
-int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues)
+int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues);
+  const hipError_t HIP_err = hip->hipCtxPopCurrent (pctx);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err);
     }
 
     return -1;
@@ -2870,25 +2632,25 @@ int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const
   return 0;
 }
 
-int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod)
+int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipModuleUnload (hmod);
+  const hipError_t HIP_err = hip->hipCtxPushCurrent (ctx);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err);
     }
 
     return -1;
@@ -2897,19 +2659,19 @@ int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod)
   return 0;
 }
 
-int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
+int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipCtxSetCurrent (ctx);
+  const hipError_t HIP_err = hip->hipCtxSetCurrent (ctx);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
       event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %s", pStr);
     }
@@ -2924,25 +2686,25 @@ int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
   return 0;
 }
 
-int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize)
+int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemAlloc (dptr, bytesize);
+  const hipError_t HIP_err = hip->hipCtxSynchronize ();
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err);
     }
 
     return -1;
@@ -2951,25 +2713,25 @@ int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytes
   return 0;
 }
 
-int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr)
+int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, hipDevice_t* device, int ordinal)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemFree (dptr);
+  const hipError_t HIP_err = hip->hipDeviceGet (device, ordinal);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err);
     }
 
     return -1;
@@ -2978,25 +2740,25 @@ int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr)
   return 0;
 }
 
-int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount)
+int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount);
+  const hipError_t HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %d", HIP_err);
     }
 
     return -1;
@@ -3005,25 +2767,25 @@ int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr sr
   return 0;
 }
 
-int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream)
+int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream);
+  const hipError_t HIP_err = hip->hipDeviceGetCount (count);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipDeviceGetCount(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipDeviceGetCount(): %d", HIP_err);
     }
 
     return -1;
@@ -3032,25 +2794,25 @@ int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdevicep
   return 0;
 }
 
-int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount)
+int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, hipDevice_t dev)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount);
+  const hipError_t HIP_err = hip->hipDeviceGetName (name, len, dev);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipDeviceGetName(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipDeviceGetName(): %d", HIP_err);
     }
 
     return -1;
@@ -3059,25 +2821,25 @@ int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdev
   return 0;
 }
 
-int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream)
+int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, hipDevice_t dev)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream);
+  const hipError_t HIP_err = hip->hipDeviceTotalMem (bytes, dev);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %d", HIP_err);
     }
 
     return -1;
@@ -3086,25 +2848,25 @@ int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, H
   return 0;
 }
 
-int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
+int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount);
+  const hipError_t HIP_err = hip->hipDriverGetVersion (driverVersion);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipDriverGetVersion(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipDriverGetVersion(): %d", HIP_err);
     }
 
     return -1;
@@ -3113,25 +2875,25 @@ int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const
   return 0;
 }
 
-int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount, HIPstream hStream)
+int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream);
+  const hipError_t HIP_err = hip->hipEventCreate (phEvent, Flags);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipEventCreate(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipEventCreate(): %d", HIP_err);
     }
 
     return -1;
@@ -3140,25 +2902,25 @@ int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, c
   return 0;
 }
 
-int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name)
+int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name);
+  const hipError_t HIP_err = hip->hipEventDestroy (hEvent);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipEventDestroy(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipEventDestroy(): %d", HIP_err);
     }
 
     return -1;
@@ -3167,25 +2929,25 @@ int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIP
   return 0;
 }
 
-int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t *bytes, HIPmodule hmod, const char *name)
+int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name);
+  const hipError_t HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipEventElapsedTime(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipEventElapsedTime(): %d", HIP_err);
     }
 
     return -1;
@@ -3194,25 +2956,25 @@ int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_
   return 0;
 }
 
-int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total)
+int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipMemGetInfo (free, total);
+  const hipError_t HIP_err = hip->hipEventRecord (hEvent, hStream);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipEventRecord(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipEventRecord(): %d", HIP_err);
     }
 
     return -1;
@@ -3221,25 +2983,25 @@ int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total)
   return 0;
 }
 
-int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc)
+int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc);
+  const hipError_t HIP_err = hip->hipEventSynchronize (hEvent);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipEventSynchronize(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipEventSynchronize(): %d", HIP_err);
     }
 
     return -1;
@@ -3248,29 +3010,25 @@ int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_att
   return 0;
 }
 
-/*
-
-// ATTENTION, this one maps to cudaFuncSetAttribute not cuFuncSetAttribute !!!
-
-int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value)
+int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipFuncSetAttribute (hfunc, attrib, value);
+  const hipError_t HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err);
     }
 
     return -1;
@@ -3278,27 +3036,26 @@ int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfu
 
   return 0;
 }
-*/
 
-int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags)
+int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipStreamCreate (phStream, Flags);
+  const hipError_t HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err);
     }
 
     return -1;
@@ -3307,25 +3064,25 @@ int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigne
   return 0;
 }
 
-int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream)
+int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipStreamDestroy (hStream);
+  const hipError_t HIP_err = hip->hipInit (Flags);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipInit(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err);
     }
 
     return -1;
@@ -3334,25 +3091,25 @@ int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream)
   return 0;
 }
 
-int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream)
+int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipStreamSynchronize (hStream);
+  const hipError_t HIP_err = hip->hipMemAlloc (dptr, bytesize);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err);
     }
 
     return -1;
@@ -3361,25 +3118,25 @@ int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream)
   return 0;
 }
 
-int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra)
+int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dptr)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra);
+  const hipError_t HIP_err = hip->hipMemFree (dptr);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err);
     }
 
     return -1;
@@ -3388,25 +3145,25 @@ int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int
   return 0;
 }
 
-int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
+int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipCtxSynchronize ();
+  const hipError_t HIP_err = hip->hipMemGetInfo (free, total);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err);
     }
 
     return -1;
@@ -3415,25 +3172,25 @@ int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
-int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags)
+int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipEventCreate (phEvent, Flags);
+  const hipError_t HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipEventCreate(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipEventCreate(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err);
     }
 
     return -1;
@@ -3442,25 +3199,25 @@ int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned i
   return 0;
 }
 
-int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
+int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipEventDestroy (hEvent);
+  const hipError_t HIP_err = hip->hipMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipEventDestroy(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipEventDestroy(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %d", HIP_err);
     }
 
     return -1;
@@ -3469,25 +3226,25 @@ int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
   return 0;
 }
 
-int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd)
+int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd);
+  const hipError_t HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipEventElapsedTime(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipEventElapsedTime(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err);
     }
 
     return -1;
@@ -3496,25 +3253,25 @@ int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HI
   return 0;
 }
 
-int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
+int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipEventQuery (hEvent);
+  const hipError_t HIP_err = hip->hipMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipEventQuery(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipEventQuery(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %d", HIP_err);
     }
 
     return -1;
@@ -3523,25 +3280,25 @@ int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
   return 0;
 }
 
-int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream)
+int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipEventRecord (hEvent, hStream);
+  const hipError_t HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipEventRecord(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipEventRecord(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err);
     }
 
     return -1;
@@ -3550,25 +3307,25 @@ int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hS
   return 0;
 }
 
-int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
+int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipEventSynchronize (hEvent);
+  const hipError_t HIP_err = hip->hipMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipEventSynchronize(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipEventSynchronize(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %d", HIP_err);
     }
 
     return -1;
@@ -3577,25 +3334,25 @@ int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
   return 0;
 }
 
-int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config)
+int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipCtxSetCacheConfig (config);
+  const hipError_t HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err);
     }
 
     return -1;
@@ -3604,25 +3361,25 @@ int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config)
   return 0;
 }
 
-int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
+int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipCtxPushCurrent (ctx);
+  const hipError_t HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err);
     }
 
     return -1;
@@ -3631,25 +3388,25 @@ int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
   return 0;
 }
 
-int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx)
+int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipCtxPopCurrent (pctx);
+  const hipError_t HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err);
     }
 
     return -1;
@@ -3658,25 +3415,25 @@ int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx)
   return 0;
 }
 
-int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut)
+int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipLinkCreate (numOptions, options, optionValues, stateOut);
+  const hipError_t HIP_err = hip->hipModuleUnload (hmod);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipLinkCreate(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipLinkCreate(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err);
     }
 
     return -1;
@@ -3685,25 +3442,25 @@ int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPji
   return 0;
 }
 
-int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues)
+int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipLinkAddData (state, type, data, size, name, numOptions, options, optionValues);
+  const hipError_t HIP_err = hip->hipStreamCreate (phStream, Flags);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipLinkAddData(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipLinkAddData(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err);
     }
 
     return -1;
@@ -3712,25 +3469,25 @@ int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInp
   return 0;
 }
 
-int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state)
+int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipLinkDestroy (state);
+  const hipError_t HIP_err = hip->hipStreamDestroy (hStream);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipLinkDestroy(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipLinkDestroy(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err);
     }
 
     return -1;
@@ -3739,25 +3496,25 @@ int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state)
   return 0;
 }
 
-int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut)
+int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const HIPresult HIP_err = hip->hipLinkComplete (state, hipbinOut, sizeOut);
+  const hipError_t HIP_err = hip->hipStreamSynchronize (hStream);
 
-  if (HIP_err != HIP_SUCCESS)
+  if (HIP_err != hipSuccess)
   {
     const char *pStr = NULL;
 
-    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
     {
-      event_log_error (hashcat_ctx, "hipLinkComplete(): %s", pStr);
+      event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr);
     }
     else
     {
-      event_log_error (hashcat_ctx, "hipLinkComplete(): %d", HIP_err);
+      event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err);
     }
 
     return -1;
@@ -5245,7 +5002,7 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   return 0;
 }
 
-int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num)
+int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num)
 {
   u64 num_elements = num;
 
@@ -5256,7 +5013,7 @@ int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   num_elements = CEILDIV (num_elements, kernel_threads);
 
-  HIPfunction function = device_param->hip_function_atinit;
+  hipFunction_t function = device_param->hip_function_atinit;
 
   if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_atinit, NULL) == -1) return -1;
 
@@ -5265,7 +5022,7 @@ int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   return 0;
 }
 
-int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num)
+int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num)
 {
   u64 num_elements = num;
 
@@ -5276,7 +5033,7 @@ int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t
 
   num_elements = CEILDIV (num_elements, kernel_threads);
 
-  HIPfunction function = device_param->hip_function_utf8toutf16le;
+  hipFunction_t function = device_param->hip_function_utf8toutf16le;
 
   if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_utf8toutf16le, NULL) == -1) return -1;
 
@@ -5285,7 +5042,7 @@ int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t
   return 0;
 }
 
-int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size)
+int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u32 value, const u64 size)
 {
   const u64 num16d = size / 16;
   const u64 num16m = size % 16;
@@ -5301,7 +5058,7 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
     u64 num_elements = CEILDIV (num16d, kernel_threads);
 
-    HIPfunction function = device_param->hip_function_memset;
+    hipFunction_t function = device_param->hip_function_memset;
 
     if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1;
   }
@@ -5321,7 +5078,7 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   return 0;
 }
 
-int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size)
+int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size)
 {
   const u64 num16d = size / 16;
   const u64 num16m = size % 16;
@@ -5336,7 +5093,7 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
     u64 num_elements = CEILDIV(num16d, kernel_threads);
 
-    HIPfunction function = device_param->hip_function_bzero;
+    hipFunction_t function = device_param->hip_function_bzero;
 
     if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_bzero, NULL) == -1) return -1;
   }
@@ -5686,7 +5443,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
   if (device_param->is_hip == true)
   {
-    HIPfunction hip_function = NULL;
+    hipFunction_t hip_function = NULL;
 
     if (device_param->is_hip == true)
     {
@@ -6014,7 +5771,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   if (device_param->is_hip == true)
   {
-    HIPfunction hip_function = NULL;
+    hipFunction_t hip_function = NULL;
 
     void **hip_args = NULL;
 
@@ -6107,7 +5864,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
   if (device_param->is_hip == true)
   {
-    HIPfunction hip_function = device_param->hip_function_tm;
+    hipFunction_t hip_function = device_param->hip_function_tm;
 
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1;
 
@@ -6154,7 +5911,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   {
     num_elements = CEILDIV (num_elements, kernel_threads);
 
-    HIPfunction hip_function = device_param->hip_function_amp;
+    hipFunction_t hip_function = device_param->hip_function_amp;
 
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1;
 
@@ -6205,7 +5962,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   {
     num_elements = CEILDIV (num_elements, kernel_threads);
 
-    HIPfunction hip_function = device_param->hip_function_decompress;
+    hipFunction_t hip_function = device_param->hip_function_decompress;
 
     if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1;
 
@@ -7407,38 +7164,17 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     if ((rc_hip_init == 0) && (rc_hiprtc_init == 0))
     {
-      // hiprtc version
-
-      int hiprtc_major = 0;
-      int hiprtc_minor = 0;
-
-      if (hc_hiprtcVersion (hashcat_ctx, &hiprtc_major, &hiprtc_minor) == -1) return -1;
-
-      int hiprtc_driver_version = (hiprtc_major * 1000) + (hiprtc_minor * 10);
-
-      backend_ctx->hiprtc_driver_version = hiprtc_driver_version;
-
-      if (hiprtc_driver_version < 9000)
-      {
-        event_log_error (hashcat_ctx, "Outdated AMD HIPRTC driver version '%d' detected!", hiprtc_driver_version);
-
-        event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions.");
-        event_log_warning (hashcat_ctx, NULL);
-
-        return -1;
-      }
-
       // hip version
 
-      int hip_driver_version = 10000;
+      int hip_driverVersion;
 
-      //if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driver_version) == -1) return -1;
+      if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driverVersion) == -1) return -1;
 
-      backend_ctx->hip_driver_version = hip_driver_version;
+      backend_ctx->hip_driverVersion = hip_driverVersion;
 
-      if (hip_driver_version < 9000)
+      if (hip_driverVersion < 404)
       {
-        event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driver_version);
+        event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driverVersion);
 
         event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions.");
         event_log_warning (hashcat_ctx, NULL);
@@ -8289,7 +8025,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       backend_ctx->backend_device_from_hip[hip_devices_idx] = backend_devices_idx;
 
-      HIPdevice hip_device;
+      hipDevice_t hip_device;
 
       if (hc_hipDeviceGet (hashcat_ctx, &hip_device, hip_devices_idx) == -1)
       {
@@ -8328,7 +8064,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_processors = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, hipDeviceAttributeMultiprocessorCount, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8356,7 +8092,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int hip_warp_size = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, hipDeviceAttributeWarpSize, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8369,13 +8105,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       int sm_major = 0;
       int sm_minor = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, hipDeviceAttributeComputeCapabilityMajor, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
       }
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, hipDeviceAttributeComputeCapabilityMinor, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8388,7 +8124,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_maxworkgroup_size = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, hipDeviceAttributeMaxThreadsPerBlock, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8400,7 +8136,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_maxclock_frequency = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, HIP_DEVICE_ATTRIBUTE_CLOCK_RATE, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, hipDeviceAttributeClockRate, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8414,19 +8150,20 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       int pci_bus_id_nv     = 0;
       int pci_slot_id_nv    = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, hip_device) == -1)
-      {
-        device_param->skipped = true;
-        continue;
-      }
+      // Not supported by HIP
+      //if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, hipDeviceAttributePciDomainID, hip_device) == -1)
+      //{
+      //  device_param->skipped = true;
+      //  continue;
+      //}
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, hipDeviceAttributePciBusId, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
       }
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, hipDeviceAttributePciDeviceId, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8434,6 +8171,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->pcie_domain   = (u8) (pci_domain_id_nv);
       device_param->pcie_bus      = (u8) (pci_bus_id_nv);
+
       device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
       device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
 
@@ -8441,7 +8179,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int kernel_exec_timeout = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, hipDeviceAttributeKernelExecTimeout, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8453,7 +8191,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int warp_size = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &warp_size, hipDeviceAttributeWarpSize, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8465,7 +8203,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int max_shared_memory_per_block = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, hipDeviceAttributeMaxSharedMemoryPerBlock, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8484,7 +8222,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_max_constant_buffer_size = 0;
 
-      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, hip_device) == -1)
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, hipDeviceAttributeTotalConstantMemory, hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -8577,13 +8315,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // instruction set
 
-      // bcrypt optimization?
-      //const int rc_cuCtxSetCacheConfig = hc_hipCtxSetCacheConfig (hashcat_ctx, HIP_FUNC_CACHE_PREFER_SHARED);
-      //
-      //if (rc_cuCtxSetCacheConfig == -1) return -1;
-
-      // const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
-
       device_param->has_add   = false;
       device_param->has_addc  = false;
       device_param->has_sub   = false;
@@ -8595,9 +8326,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // device_available_mem
 
-      HIPcontext hip_context;
+      hipCtx_t hip_context;
 
-      if (hc_hipCtxCreate (hashcat_ctx, &hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1)
+      if (hc_hipCtxCreate (hashcat_ctx, &hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -10280,7 +10011,7 @@ static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunctio
   return 0;
 }
 
-static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u32 *result)
+static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u32 *result)
 {
   int max_threads_per_block;
 
@@ -10291,7 +10022,7 @@ static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function,
   return 0;
 }
 
-static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result)
+static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u64 *result)
 {
   int shared_size_bytes;
 
@@ -10410,7 +10141,7 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
   return kernel_threads;
 }
 
-static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, HIPmodule *hip_module)
+static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module)
 {
   const hashconfig_t    *hashconfig    = hashcat_ctx->hashconfig;
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
@@ -10698,18 +10429,12 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
 
       char **hiprtc_options = (char **) hccalloc (7 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
 
-      //hiprtc_options[0] = "--restrict";
-      //hiprtc_options[1] = "--device-as-default-execution-space";
-      //hiprtc_options[2] = "--gpu-architecture";
-
       hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
 
-      //hiprtc_options[0] = "--gpu-max-threads-per-block=64";
       hiprtc_options[1] = "-nocudainc";
       hiprtc_options[2] = "-nocudalib";
       hiprtc_options[3] = "";
       hiprtc_options[4] = "";
-
       hiprtc_options[5] = "-I";
       hiprtc_options[6] = folder_config->cpath_real;
 
@@ -10772,137 +10497,27 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
 
       int mod_cnt = 6;
 
-      HIPjit_option mod_opts[7];
-      void *mod_vals[7];
+      hipJitOption mod_opts[6];
+      void *mod_vals[6];
 
-      mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT;
+      mod_opts[0] = hipJitOptionTargetFromContext;
       mod_vals[0] = (void *) 0;
 
-      mod_opts[1] = HIP_JIT_LOG_VERBOSE;
+      mod_opts[1] = hipJitOptionLogVerbose;
       mod_vals[1] = (void *) 1;
 
-      mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER;
+      mod_opts[2] = hipJitOptionInfoLogBuffer;
       mod_vals[2] = (void *) mod_info_log;
 
-      mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes;
       mod_vals[3] = (void *) LOG_SIZE;
 
-      mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER;
+      mod_opts[4] = hipJitOptionErrorLogBuffer;
       mod_vals[4] = (void *) mod_error_log;
 
-      mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
       mod_vals[5] = (void *) LOG_SIZE;
 
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
-      {
-        mod_opts[6] = HIP_JIT_MAX_REGISTERS;
-        mod_vals[6] = (void *) 128;
-
-        mod_cnt++;
-      }
-
-      #if defined (WITH_HIPBIN)
-
-      char *jit_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1);
-
-      int jit_cnt = 6;
-
-      HIPjit_option jit_opts[7];
-      void *jit_vals[7];
-
-      jit_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT;
-      jit_vals[0] = (void *) 0;
-
-      jit_opts[1] = HIP_JIT_LOG_VERBOSE;
-      jit_vals[1] = (void *) 1;
-
-      jit_opts[2] = HIP_JIT_INFO_LOG_BUFFER;
-      jit_vals[2] = (void *) jit_info_log;
-
-      jit_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-      jit_vals[3] = (void *) LOG_SIZE;
-
-      jit_opts[4] = HIP_JIT_ERROR_LOG_BUFFER;
-      jit_vals[4] = (void *) jit_error_log;
-
-      jit_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
-      jit_vals[5] = (void *) LOG_SIZE;
-
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
-      {
-        jit_opts[6] = HIP_JIT_MAX_REGISTERS;
-        jit_vals[6] = (void *) 128;
-
-        jit_cnt++;
-      }
-
-      HIPlinkState state;
-
-      if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      if (hc_cuLinkAddData (hashcat_ctx, state, HIP_JIT_INPUT_CODE, binary, binary_size, kernel_name, 0, NULL, NULL) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      void *cubin = NULL;
-
-      size_t cubin_size = 0;
-
-      if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", jit_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
-
-      if (hc_cuModuleLoadDataEx (hashcat_ctx, hip_module, cubin, mod_cnt, mod_opts, mod_vals) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
-
-      if (cache_disable == false)
-      {
-        if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false;
-      }
-
-      if (hc_hipLinkDestroy (hashcat_ctx, state) == -1) return false;
-
-      hcfree (jit_info_log);
-      hcfree (jit_error_log);
-
-      #else
-
       if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
       {
         event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
@@ -10923,8 +10538,6 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
         if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
       }
 
-      #endif
-
       hcfree (mod_info_log);
       hcfree (mod_error_log);
 
@@ -11072,35 +10685,27 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
 
       int mod_cnt = 6;
 
-      HIPjit_option mod_opts[7];
-      void *mod_vals[7];
+      hipJitOption mod_opts[6];
+      void *mod_vals[6];
 
-      mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT;
+      mod_opts[0] = hipJitOptionTargetFromContext;
       mod_vals[0] = (void *) 0;
 
-      mod_opts[1] = HIP_JIT_LOG_VERBOSE;
+      mod_opts[1] = hipJitOptionLogVerbose;
       mod_vals[1] = (void *) 1;
 
-      mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER;
+      mod_opts[2] = hipJitOptionInfoLogBuffer;
       mod_vals[2] = (void *) mod_info_log;
 
-      mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes;
       mod_vals[3] = (void *) LOG_SIZE;
 
-      mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER;
+      mod_opts[4] = hipJitOptionErrorLogBuffer;
       mod_vals[4] = (void *) mod_error_log;
 
-      mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
       mod_vals[5] = (void *) LOG_SIZE;
 
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
-      {
-        mod_opts[6] = HIP_JIT_MAX_REGISTERS;
-        mod_vals[6] = (void *) 128;
-
-        mod_cnt++;
-      }
-
       if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
       {
         event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
@@ -11491,7 +11096,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1)
+      if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -11555,7 +11160,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, HIP_STREAM_DEFAULT) == -1)
+      if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -11587,13 +11192,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, HIP_EVENT_BLOCKING_SYNC) == -1)
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
       {
         device_param->skipped = true;
         continue;
       }
 
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, HIP_EVENT_BLOCKING_SYNC) == -1)
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -11781,7 +11386,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u",
       backend_ctx->comptime,
       backend_ctx->cuda_driver_version,
-      backend_ctx->hip_driver_version,
+      backend_ctx->hip_driverVersion,
       device_param->is_opencl,
       device_param->opencl_platform_vendor_id,
       device_param->device_name,
@@ -12109,7 +11714,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s",
         backend_ctx->comptime,
         backend_ctx->cuda_driver_version,
-        backend_ctx->hip_driver_version,
+        backend_ctx->hip_driverVersion,
         device_param->is_opencl,
         device_param->opencl_platform_vendor_id,
         device_param->device_name,
diff --git a/src/terminal.c b/src/terminal.c
index 167b8b821..397eace9f 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -818,9 +818,9 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     event_log_info (hashcat_ctx, NULL);
 
     int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
-    int hip_driver_version = backend_ctx->hip_driver_version;
+    int hip_driverVersion  = backend_ctx->hip_driverVersion;
 
-    event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driver_version / 1000, (hip_driver_version % 100) / 10);
+    event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driverVersion / 100, hip_driverVersion % 10);
     event_log_info (hashcat_ctx, NULL);
 
     for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++)
@@ -1014,10 +1014,10 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->hip)
   {
-    int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
-    int hip_driver_version = backend_ctx->hip_driver_version;
+    int hip_devices_cnt   = backend_ctx->hip_devices_cnt;
+    int hip_driverVersion = backend_ctx->hip_driverVersion;
 
-    const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driver_version / 1000, (hip_driver_version % 100) / 10);
+    const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driverVersion / 100, hip_driverVersion % 10);
 
     char line[HCBUFSIZ_TINY] = { 0 };
 

From 3f6c5a0042c3c8cd7e1c95708761b1998ad7279f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 23 Jul 2021 21:09:55 +0200
Subject: [PATCH 05/24] Update module_unstable_warning() for -m 172xx on HIP

---
 OpenCL/inc_platform.cl     | 13 +++++++------
 OpenCL/inc_types.h         |  2 +-
 OpenCL/inc_vendor.h        |  4 ----
 OpenCL/inc_zip_inflate.cl  |  4 +---
 src/modules/module_17200.c |  5 -----
 src/modules/module_17220.c |  5 -----
 src/modules/module_17225.c |  5 -----
 7 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 47cfc9a84..8ccb034aa 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -208,19 +208,20 @@ DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
   return atomicOr (p, val);
 }
 
-DECLSPEC size_t get_global_id  (const u32 dimindx)
+DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
 {
-  return (__ockl_get_group_id (dimindx) * __ockl_get_local_size (dimindx)) + __ockl_get_local_id (dimindx);
+  return (blockIdx.x * blockDim.x) + threadIdx.x;
 }
 
-DECLSPEC size_t get_local_id (const u32 dimindx)
+DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
 {
-  return __ockl_get_local_id (dimindx);
+  return threadIdx.x;
 }
 
-DECLSPEC size_t get_local_size (const u32 dimindx)
+DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
 {
-  return __ockl_get_local_size (dimindx);
+  // verify
+  return blockDim.x;
 }
 
 DECLSPEC u32x rotl32 (const u32x a, const int n)
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 4caf595de..295caabc0 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -833,7 +833,7 @@ typedef __device_builtin__ struct u64x u64x;
 typedef VTYPE(uchar,  VECT_SIZE) u8x;
 typedef VTYPE(ushort, VECT_SIZE) u16x;
 typedef VTYPE(uint,   VECT_SIZE) u32x;
-typedef VTYPE(ulong,  VECT_SIZE) u64x;
+typedef VTYPE(ullong, VECT_SIZE) u64x;
 
 #define make_u8x  (u8x)
 #define make_u16x (u16x)
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index d44ac87c9..bbd7e23d8 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -32,10 +32,6 @@
 #define LOCAL_AS
 #define KERNEL_FQ   extern "C" __global__
 #elif defined IS_HIP
-#define __device__   __attribute__((device))
-#define __constant__ __attribute__((constant))
-#define __shared__   __attribute__((shared))
-#define __global__   __attribute__((global))
 #define CONSTANT_VK __constant__
 #define CONSTANT_AS
 #define GLOBAL_AS
diff --git a/OpenCL/inc_zip_inflate.cl b/OpenCL/inc_zip_inflate.cl
index d05f6a792..fcc31e76b 100644
--- a/OpenCL/inc_zip_inflate.cl
+++ b/OpenCL/inc_zip_inflate.cl
@@ -73,18 +73,16 @@ enum{
     MZ_VERSION_ERROR = -6,
     MZ_PARAM_ERROR = -10000
 };
-typedef unsigned long mz_ulong;
+typedef ullong mz_ulong;
 
 #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
 typedef unsigned char Byte;
 typedef unsigned int uInt;
-typedef mz_ulong uLong;
 typedef Byte Bytef;
 typedef uInt uIntf;
 typedef char charf;
 typedef int intf;
 typedef void *voidpf;
-typedef uLong uLongf;
 typedef void *voidp;
 typedef void *const voidpc;
 #define Z_NULL 0
diff --git a/src/modules/module_17200.c b/src/modules/module_17200.c
index 44798110d..fa510ba27 100644
--- a/src/modules/module_17200.c
+++ b/src/modules/module_17200.c
@@ -170,11 +170,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
   // workaround would be to rewrite kernel to use global memory
 
-  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-  {
-    return true;
-  }
-
   if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     return true;
diff --git a/src/modules/module_17220.c b/src/modules/module_17220.c
index 9028040d9..8f1beaf1a 100644
--- a/src/modules/module_17220.c
+++ b/src/modules/module_17220.c
@@ -170,11 +170,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
   // workaround would be to rewrite kernel to use global memory
 
-  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-  {
-    return true;
-  }
-
   if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     return true;
diff --git a/src/modules/module_17225.c b/src/modules/module_17225.c
index 75c376c9e..3b3291d5f 100644
--- a/src/modules/module_17225.c
+++ b/src/modules/module_17225.c
@@ -170,11 +170,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
   // workaround would be to rewrite kernel to use global memory
 
-  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-  {
-    return true;
-  }
-
   if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     return true;

From 7c8d7f783191b9892fe3854790687b8bba5607a6 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 24 Jul 2021 17:03:50 +0200
Subject: [PATCH 06/24] Added hash-mode: SNMPv3 HMAC-MD5-96/HMAC-SHA1-96

---
 OpenCL/m25000-pure.cl        | 577 +++++++++++++++++++++++++++++++++++
 docs/changes.txt             |   1 +
 docs/readme.txt              |   1 +
 src/modules/module_25000.c   | 333 ++++++++++++++++++++
 tools/test_modules/m25000.pm | 116 +++++++
 5 files changed, 1028 insertions(+)
 create mode 100644 OpenCL/m25000-pure.cl
 create mode 100644 src/modules/module_25000.c
 create mode 100644 tools/test_modules/m25000.pm

diff --git a/OpenCL/m25000-pure.cl b/OpenCL/m25000-pure.cl
new file mode 100644
index 000000000..33c1dcc8f
--- /dev/null
+++ b/OpenCL/m25000-pure.cl
@@ -0,0 +1,577 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.cl"
+#include "inc_common.cl"
+#include "inc_simd.cl"
+#include "inc_hash_md5.cl"
+#include "inc_hash_sha1.cl"
+#endif
+
+#define COMPARE_S "inc_comp_single.cl"
+#define COMPARE_M "inc_comp_multi.cl"
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         32
+#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS_MD5       4
+#define SNMPV3_HASH_ELEMS_SHA1      8 // 8 = aligned 5
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_md5_tmp
+{
+  u32 tmp_md5[SNMPV3_TMP_ELEMS];
+  u32 tmp_sha1[SNMPV3_TMP_ELEMS];
+
+  u32 h_md5[SNMPV3_HASH_ELEMS_MD5];
+  u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1];
+
+} hmac_md5_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+KERNEL_FQ void m25000_init (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  u8 *src_ptr = (u8 *) w;
+
+  // password 64 times, also swapped
+
+  u32 dst_buf[16];
+
+  u8 *dst_ptr = (u8 *) dst_buf;
+
+  int tmp_idx = 0;
+
+  for (int i = 0; i < 64; i++)
+  {
+    for (int j = 0; j < pw_len; j++)
+    {
+      const int dst_idx = tmp_idx & 63;
+
+      dst_ptr[dst_idx] = src_ptr[j];
+
+      // write to global memory every time 64 byte are written into cache
+
+      if (dst_idx == 63)
+      {
+        const int tmp_idx4 = (tmp_idx - 63) / 4;
+
+        // md5
+
+        tmps[gid].tmp_md5[tmp_idx4 +  0] = dst_buf[ 0];
+        tmps[gid].tmp_md5[tmp_idx4 +  1] = dst_buf[ 1];
+        tmps[gid].tmp_md5[tmp_idx4 +  2] = dst_buf[ 2];
+        tmps[gid].tmp_md5[tmp_idx4 +  3] = dst_buf[ 3];
+        tmps[gid].tmp_md5[tmp_idx4 +  4] = dst_buf[ 4];
+        tmps[gid].tmp_md5[tmp_idx4 +  5] = dst_buf[ 5];
+        tmps[gid].tmp_md5[tmp_idx4 +  6] = dst_buf[ 6];
+        tmps[gid].tmp_md5[tmp_idx4 +  7] = dst_buf[ 7];
+        tmps[gid].tmp_md5[tmp_idx4 +  8] = dst_buf[ 8];
+        tmps[gid].tmp_md5[tmp_idx4 +  9] = dst_buf[ 9];
+        tmps[gid].tmp_md5[tmp_idx4 + 10] = dst_buf[10];
+        tmps[gid].tmp_md5[tmp_idx4 + 11] = dst_buf[11];
+        tmps[gid].tmp_md5[tmp_idx4 + 12] = dst_buf[12];
+        tmps[gid].tmp_md5[tmp_idx4 + 13] = dst_buf[13];
+        tmps[gid].tmp_md5[tmp_idx4 + 14] = dst_buf[14];
+        tmps[gid].tmp_md5[tmp_idx4 + 15] = dst_buf[15];
+
+        // sha1
+
+        tmps[gid].tmp_sha1[tmp_idx4 +  0] = hc_swap32_S (dst_buf[ 0]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  1] = hc_swap32_S (dst_buf[ 1]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  2] = hc_swap32_S (dst_buf[ 2]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  3] = hc_swap32_S (dst_buf[ 3]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  4] = hc_swap32_S (dst_buf[ 4]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  5] = hc_swap32_S (dst_buf[ 5]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  6] = hc_swap32_S (dst_buf[ 6]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  7] = hc_swap32_S (dst_buf[ 7]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  8] = hc_swap32_S (dst_buf[ 8]);
+        tmps[gid].tmp_sha1[tmp_idx4 +  9] = hc_swap32_S (dst_buf[ 9]);
+        tmps[gid].tmp_sha1[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
+        tmps[gid].tmp_sha1[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
+        tmps[gid].tmp_sha1[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
+        tmps[gid].tmp_sha1[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
+        tmps[gid].tmp_sha1[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
+        tmps[gid].tmp_sha1[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
+     }
+
+      tmp_idx++;
+    }
+  }
+
+  // hash md5
+
+  tmps[gid].h_md5[0] = MD5M_A;
+  tmps[gid].h_md5[1] = MD5M_B;
+  tmps[gid].h_md5[2] = MD5M_C;
+  tmps[gid].h_md5[3] = MD5M_D;
+
+  // hash sha1
+
+  tmps[gid].h_sha1[0] = SHA1M_A;
+  tmps[gid].h_sha1[1] = SHA1M_B;
+  tmps[gid].h_sha1[2] = SHA1M_C;
+  tmps[gid].h_sha1[3] = SHA1M_D;
+  tmps[gid].h_sha1[4] = SHA1M_E;
+}
+
+KERNEL_FQ void m25000_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 h_md5[4];
+
+  h_md5[0] = tmps[gid].h_md5[0];
+  h_md5[1] = tmps[gid].h_md5[1];
+  h_md5[2] = tmps[gid].h_md5[2];
+  h_md5[3] = tmps[gid].h_md5[3];
+
+  u32 h_sha1[5];
+
+  h_sha1[0] = tmps[gid].h_sha1[0];
+  h_sha1[1] = tmps[gid].h_sha1[1];
+  h_sha1[2] = tmps[gid].h_sha1[2];
+  h_sha1[3] = tmps[gid].h_sha1[3];
+  h_sha1[4] = tmps[gid].h_sha1[4];
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  const int pw_len64 = pw_len * 64;
+
+  #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
+                                    // for pw length > 64 we use global memory reads
+
+  u32 tmp_md5[SNMPV3_TMP_ELEMS_OPT];
+  u32 tmp_sha1[SNMPV3_TMP_ELEMS_OPT];
+
+  if (pw_len < 64)
+  {
+    for (int i = 0; i < pw_len64 / 4; i++)
+    {
+      tmp_md5[i] = tmps[gid].tmp_md5[i];
+      tmp_sha1[i] = tmps[gid].tmp_sha1[i];
+    }
+
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      // md5
+
+      w0[0] = tmp_md5[idx +  0];
+      w0[1] = tmp_md5[idx +  1];
+      w0[2] = tmp_md5[idx +  2];
+      w0[3] = tmp_md5[idx +  3];
+      w1[0] = tmp_md5[idx +  4];
+      w1[1] = tmp_md5[idx +  5];
+      w1[2] = tmp_md5[idx +  6];
+      w1[3] = tmp_md5[idx +  7];
+      w2[0] = tmp_md5[idx +  8];
+      w2[1] = tmp_md5[idx +  9];
+      w2[2] = tmp_md5[idx + 10];
+      w2[3] = tmp_md5[idx + 11];
+      w3[0] = tmp_md5[idx + 12];
+      w3[1] = tmp_md5[idx + 13];
+      w3[2] = tmp_md5[idx + 14];
+      w3[3] = tmp_md5[idx + 15];
+
+      md5_transform (w0, w1, w2, w3, h_md5);
+
+      // sha1
+
+      w0[0] = tmp_sha1[idx +  0];
+      w0[1] = tmp_sha1[idx +  1];
+      w0[2] = tmp_sha1[idx +  2];
+      w0[3] = tmp_sha1[idx +  3];
+      w1[0] = tmp_sha1[idx +  4];
+      w1[1] = tmp_sha1[idx +  5];
+      w1[2] = tmp_sha1[idx +  6];
+      w1[3] = tmp_sha1[idx +  7];
+      w2[0] = tmp_sha1[idx +  8];
+      w2[1] = tmp_sha1[idx +  9];
+      w2[2] = tmp_sha1[idx + 10];
+      w2[3] = tmp_sha1[idx + 11];
+      w3[0] = tmp_sha1[idx + 12];
+      w3[1] = tmp_sha1[idx + 13];
+      w3[2] = tmp_sha1[idx + 14];
+      w3[3] = tmp_sha1[idx + 15];
+
+      sha1_transform (w0, w1, w2, w3, h_sha1);
+    }
+  }
+  else
+  {
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      // md5
+
+      w0[0] = tmps[gid].tmp_md5[idx +  0];
+      w0[1] = tmps[gid].tmp_md5[idx +  1];
+      w0[2] = tmps[gid].tmp_md5[idx +  2];
+      w0[3] = tmps[gid].tmp_md5[idx +  3];
+      w1[0] = tmps[gid].tmp_md5[idx +  4];
+      w1[1] = tmps[gid].tmp_md5[idx +  5];
+      w1[2] = tmps[gid].tmp_md5[idx +  6];
+      w1[3] = tmps[gid].tmp_md5[idx +  7];
+      w2[0] = tmps[gid].tmp_md5[idx +  8];
+      w2[1] = tmps[gid].tmp_md5[idx +  9];
+      w2[2] = tmps[gid].tmp_md5[idx + 10];
+      w2[3] = tmps[gid].tmp_md5[idx + 11];
+      w3[0] = tmps[gid].tmp_md5[idx + 12];
+      w3[1] = tmps[gid].tmp_md5[idx + 13];
+      w3[2] = tmps[gid].tmp_md5[idx + 14];
+      w3[3] = tmps[gid].tmp_md5[idx + 15];
+
+      md5_transform (w0, w1, w2, w3, h_md5);
+
+      // sha1
+
+      w0[0] = tmps[gid].tmp_sha1[idx +  0];
+      w0[1] = tmps[gid].tmp_sha1[idx +  1];
+      w0[2] = tmps[gid].tmp_sha1[idx +  2];
+      w0[3] = tmps[gid].tmp_sha1[idx +  3];
+      w1[0] = tmps[gid].tmp_sha1[idx +  4];
+      w1[1] = tmps[gid].tmp_sha1[idx +  5];
+      w1[2] = tmps[gid].tmp_sha1[idx +  6];
+      w1[3] = tmps[gid].tmp_sha1[idx +  7];
+      w2[0] = tmps[gid].tmp_sha1[idx +  8];
+      w2[1] = tmps[gid].tmp_sha1[idx +  9];
+      w2[2] = tmps[gid].tmp_sha1[idx + 10];
+      w2[3] = tmps[gid].tmp_sha1[idx + 11];
+      w3[0] = tmps[gid].tmp_sha1[idx + 12];
+      w3[1] = tmps[gid].tmp_sha1[idx + 13];
+      w3[2] = tmps[gid].tmp_sha1[idx + 14];
+      w3[3] = tmps[gid].tmp_sha1[idx + 15];
+
+      sha1_transform (w0, w1, w2, w3, h_sha1);
+    }
+  }
+
+  tmps[gid].h_md5[0] = h_md5[0];
+  tmps[gid].h_md5[1] = h_md5[1];
+  tmps[gid].h_md5[2] = h_md5[2];
+  tmps[gid].h_md5[3] = h_md5[3];
+
+  tmps[gid].h_sha1[0] = h_sha1[0];
+  tmps[gid].h_sha1[1] = h_sha1[1];
+  tmps[gid].h_sha1[2] = h_sha1[2];
+  tmps[gid].h_sha1[3] = h_sha1[3];
+  tmps[gid].h_sha1[4] = h_sha1[4];
+}
+
+KERNEL_FQ void m25000_comp (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  // md5
+
+  w0[0] = 0x00000080;
+  w0[1] = 0;
+  w0[2] = 0;
+  w0[3] = 0;
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 1048576 * 8;
+  w3[3] = 0;
+
+  u32 h_md5[4];
+
+  h_md5[0] = tmps[gid].h_md5[0];
+  h_md5[1] = tmps[gid].h_md5[1];
+  h_md5[2] = tmps[gid].h_md5[2];
+  h_md5[3] = tmps[gid].h_md5[3];
+
+  md5_transform (w0, w1, w2, w3, h_md5);
+
+  // sha1
+
+  w0[0] = 0x80000000;
+  w0[1] = 0;
+  w0[2] = 0;
+  w0[3] = 0;
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 1048576 * 8;
+
+  u32 h_sha1[5];
+
+  h_sha1[0] = tmps[gid].h_sha1[0];
+  h_sha1[1] = tmps[gid].h_sha1[1];
+  h_sha1[2] = tmps[gid].h_sha1[2];
+  h_sha1[3] = tmps[gid].h_sha1[3];
+  h_sha1[4] = tmps[gid].h_sha1[4];
+
+  sha1_transform (w0, w1, w2, w3, h_sha1);
+
+  md5_ctx_t md5_ctx;
+  sha1_ctx_t sha1_ctx;
+
+  md5_init (&md5_ctx);
+  sha1_init (&sha1_ctx);
+
+  u32 w[16];
+
+  // md5
+
+  w[ 0] = h_md5[0];
+  w[ 1] = h_md5[1];
+  w[ 2] = h_md5[2];
+  w[ 3] = h_md5[3];
+  w[ 4] = 0;
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  md5_update (&md5_ctx, w, 16);
+
+  // sha1
+
+  w[ 0] = h_sha1[0];
+  w[ 1] = h_sha1[1];
+  w[ 2] = h_sha1[2];
+  w[ 3] = h_sha1[3];
+  w[ 4] = h_sha1[4];
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha1_update (&sha1_ctx, w, 20);
+
+  // engineID
+
+  md5_update_global (&md5_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
+
+  sha1_update_global_swap (&sha1_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
+
+  // md5
+
+  w[ 0] = h_md5[0];
+  w[ 1] = h_md5[1];
+  w[ 2] = h_md5[2];
+  w[ 3] = h_md5[3];
+  w[ 4] = 0;
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  md5_update (&md5_ctx, w, 16);
+
+  // sha1
+
+  w[ 0] = h_sha1[0];
+  w[ 1] = h_sha1[1];
+  w[ 2] = h_sha1[2];
+  w[ 3] = h_sha1[3];
+  w[ 4] = h_sha1[4];
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha1_update (&sha1_ctx, w, 20);
+
+  md5_final (&md5_ctx);
+  sha1_final (&sha1_ctx);
+
+  // md5
+
+  w[ 0] = md5_ctx.h[0];
+  w[ 1] = md5_ctx.h[1];
+  w[ 2] = md5_ctx.h[2];
+  w[ 3] = md5_ctx.h[3];
+  w[ 4] = 0;
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  md5_hmac_ctx_t md5_hmac_ctx;
+
+  md5_hmac_init (&md5_hmac_ctx, w, 16);
+
+  md5_hmac_update_global (&md5_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
+
+  md5_hmac_final (&md5_hmac_ctx);
+
+  {
+    const u32 r0 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R0]);
+    const u32 r1 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R1]);
+    const u32 r2 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R2]);
+    const u32 r3 = 0;
+
+    #define il_pos 0
+
+    #ifdef KERNEL_STATIC
+    #include COMPARE_M
+    #endif
+  }
+
+  // sha1
+
+  w[ 0] = sha1_ctx.h[0];
+  w[ 1] = sha1_ctx.h[1];
+  w[ 2] = sha1_ctx.h[2];
+  w[ 3] = sha1_ctx.h[3];
+  w[ 4] = sha1_ctx.h[4];
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha1_hmac_ctx_t sha1_hmac_ctx;
+
+  sha1_hmac_init (&sha1_hmac_ctx, w, 20);
+
+  sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
+
+  sha1_hmac_final (&sha1_hmac_ctx);
+
+  {
+    const u32 r0 = sha1_hmac_ctx.opad.h[DGST_R0];
+    const u32 r1 = sha1_hmac_ctx.opad.h[DGST_R1];
+    const u32 r2 = sha1_hmac_ctx.opad.h[DGST_R2];
+    const u32 r3 = 0;
+
+    #define il_pos 0
+
+    #ifdef KERNEL_STATIC
+    #include COMPARE_M
+    #endif
+  }
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index 7e0aab1a2..e45671cd7 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -39,6 +39,7 @@
 
 - Added hash-mode: SNMPv3 HMAC-SHA1-96
 - Added hash-mode: SNMPv3 HMAC-MD5-96
+- Added hash-mode: SNMPv3 HMAC-MD5-96/HMAC-SHA1-96
 
 * changes v6.2.2 -> v6.2.3
 
diff --git a/docs/readme.txt b/docs/readme.txt
index cff519531..ca10a4644 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -156,6 +156,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - IKE-PSK MD5
 - IKE-PSK SHA1
 - SNMPv3 HMAC-MD5-96
+- SNMPv3 HMAC-MD5-96/HMAC-SHA1-96
 - SNMPv3 HMAC-SHA1-96
 - WPA-EAPOL-PBKDF2
 - WPA-EAPOL-PMK
diff --git a/src/modules/module_25000.c b/src/modules/module_25000.c
new file mode 100644
index 000000000..5baa18663
--- /dev/null
+++ b/src/modules/module_25000.c
@@ -0,0 +1,333 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+#include "memory.h"
+#include "emu_inc_hash_md5.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_4; // 4_3
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
+static const char *HASH_NAME      = "SNMPv3 HMAC-MD5-96/HMAC-SHA1-96";
+static const u64   KERN_TYPE      = 25000;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
+static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "$SNMPv3$0$66763052$13981919518623358902340156831753173612320956749283824166083320737667668557830898783481876963136410266762758410322896320705075044221495960812100760230106803899899467077793703068392752686845035561487927252457444567685389901239388468830507087105054207914325254376053788152029716918450770264047103676562621965276752797029332926039166807829108367446173251908238116020942421323633620301312478670302264165059728208402342845743839533979473825394866704960428648622730299023225638967097578710279784722583947877561544154219162080289188160001741612377820114739093961409809862173307722539556954826052612794054060797358016549602977742745078911393042420821004243620362464971828700104979572910001640083882586179153483503492341163054930853321963503411228241996417991605003371264529827508426941919673592574025732354318435733211018917539824570724324796232199960952117561108106623865308577977944499366806697863259301760429786001824121720055893438673268643594146796410437039466462606490272723136671298529920486664067752007564122205089571790718437001200506203464426405927405102300269665189637001279369690218157456566218400534722049383049029139069701182053729830585217732347396312967325628046845068493719801191260136945971516486442056102815519090214442808707545803919529217103430588641187558031052830941742920355893755319896626873275796534820394248837050567688575113833311009595128372820474678989203565094681918285106102363272728922586582037066265522397748326630668375500179630717875844561081542915676557961288028298248995547031274515608973804660067065502484039882958958452781062725550260382637592283691962996228392332833626159043179186189904614052189303508782635840692436969244901198720814518$79f7b1$57e964c7cb117647004cf132";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+static const char *SIGNATURE_SNMPV3 = "$SNMPv3$0$";
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         32
+#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS_MD5       4
+#define SNMPV3_HASH_ELEMS_SHA1      8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_md5_tmp
+{
+  u32 tmp_md5[SNMPV3_TMP_ELEMS];
+  u32 tmp_sha1[SNMPV3_TMP_ELEMS];
+  u32 h_md5[SNMPV3_HASH_ELEMS_MD5];
+  u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1];
+
+} hmac_md5_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (snmpv3_t);
+
+  return esalt_size;
+}
+
+u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = (const u64) sizeof (hmac_md5_tmp_t);
+
+  return tmp_size;
+}
+
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  // we need to fix iteration count to guarantee the loop count is a multiple of 64
+  // 2k calls to md5_transform/sha1_transform typically is enough to overtime pcie bottleneck
+
+  const u32 kernel_loops_min = 2048 * 64;
+
+  return kernel_loops_min;
+}
+
+u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_max = 2048 * 64;
+
+  return kernel_loops_max;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  token_t token;
+
+  token.token_cnt  = 5;
+  token.signatures_cnt    = 1;
+  token.signatures_buf[0] = SIGNATURE_SNMPV3;
+
+  token.len[0]     = 10;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  // packet number
+  token.len_min[1] = 1;
+  token.len_max[1] = 8;
+  token.sep[1]     = '$';
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+  // salt
+  token.len_min[2] = 12 * 2;
+  token.len_max[2] = SNMPV3_SALT_MAX * 2;
+  token.sep[2]     = '$';
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  // engineid
+  token.len_min[3] = 5;
+  token.len_max[3] = SNMPV3_ENGINEID_MAX;
+  token.sep[3]     = '$';
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  // digest
+  token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.sep[4]     = '$';
+  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  // packet number
+
+  const u8 *packet_number_pos = token.buf[1];
+  const int packet_number_len = token.len[1];
+
+  memset (snmpv3->packet_number, 0, sizeof (snmpv3->packet_number));
+
+  strncpy ((char *) snmpv3->packet_number, (char *) packet_number_pos, packet_number_len);
+
+  // salt
+
+  const u8 *salt_pos = token.buf[2];
+  const int salt_len = token.len[2];
+
+  u8 *salt_ptr = (u8 *) snmpv3->salt_buf;
+
+  snmpv3->salt_len = hex_decode (salt_pos, salt_len, salt_ptr);
+
+  salt->salt_iter = SNMPV3_ROUNDS;
+
+  // handle unique salts detection
+
+  md5_ctx_t md5_ctx;
+
+  md5_init   (&md5_ctx);
+  md5_update (&md5_ctx, snmpv3->salt_buf, snmpv3->salt_len);
+  md5_final  (&md5_ctx);
+
+  // store md5(snmpv3->salt_buf) in salt_buf
+
+  salt->salt_len = 16;
+
+  memcpy (salt->salt_buf, md5_ctx.h, salt->salt_len);
+
+  // engineid
+
+  const u8 *engineID_pos = token.buf[3];
+  const int engineID_len = token.len[3];
+
+  u8 *engineID_ptr = (u8 *) snmpv3->engineID_buf;
+
+  snmpv3->engineID_len = hex_decode (engineID_pos, engineID_len, engineID_ptr);
+
+  // digest
+
+  const u8 *hash_pos = token.buf[4];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = 0;
+
+  // prefer sha1 due to speed
+
+  digest[0] = byte_swap_32 (digest[0]);
+  digest[1] = byte_swap_32 (digest[1]);
+  digest[2] = byte_swap_32 (digest[2]);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = snprintf (line_buf, line_size, "%s%s$", SIGNATURE_SNMPV3, (char *) snmpv3->packet_number);
+
+  out_len += hex_encode ((u8 *) snmpv3->salt_buf, snmpv3->salt_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  out_len += hex_encode ((u8 *) snmpv3->engineID_buf, snmpv3->engineID_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  // prefer sha1 due to speed
+
+  u32 digest_tmp[3];
+
+  digest_tmp[0] = byte_swap_32 (digest[0]);
+  digest_tmp[1] = byte_swap_32 (digest[1]);
+  digest_tmp[2] = byte_swap_32 (digest[2]);
+
+  u32_to_hex (digest_tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[2], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = 0;
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m25000.pm b/tools/test_modules/m25000.pm
new file mode 100644
index 000000000..889de9611
--- /dev/null
+++ b/tools/test_modules/m25000.pm
@@ -0,0 +1,116 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::MD5 qw (md5 md5_hex);
+use Digest::SHA qw (sha1 sha1_hex);
+use Digest::HMAC qw (hmac hmac_hex);
+
+sub module_constraints { [[1, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+  my $pkt_num = shift // int(rand(99999999));
+  my $engineID = shift // random_hex_string(6);
+  my $mode = shift // int(rand(1)) + 1;
+
+  # make even if needed
+
+  if (length($salt) %2 == 1)
+  {
+    $salt = $salt . "8";
+  }
+
+  my $string1 = $word x 1048576;
+
+  $string1 = substr ($string1, 0, 1048576);
+
+  my $digest1 = '';
+
+  if ($mode eq 2)
+  {
+    $digest1 = sha1_hex ($string1);
+  }
+  elsif ($mode eq 1)
+  {
+    $digest1 = md5_hex ($string1);
+  }
+
+  my $buf = join '', $digest1, $engineID, $digest1;
+
+  my $digest = '';
+
+  if ($mode eq 2)
+  {
+    my $digest2 = sha1(pack("H*", $buf));
+
+    $digest = hmac_hex (pack("H*", $salt), $digest2, \&sha1);
+  }
+  elsif ($mode eq 1)
+  {
+    my $digest2 = md5(pack("H*", $buf));
+
+    $digest = hmac_hex (pack("H*", $salt), $digest2, \&md5);
+  }
+
+  $digest = substr ($digest, 0, 24);
+
+  my $hash = sprintf ("\$SNMPv3\$0\$%s\$%s\$%s\$%s", $pkt_num, $salt, $engineID, $digest);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my $idx = index ($line, ':');
+
+  return unless $idx >= 0;
+
+  my $hash = substr ($line, 0, $idx);
+  my $word = substr ($line, $idx + 1);
+
+  return unless length ($word) gt 0;
+  return unless substr ($hash, 0, 10) eq '$SNMPv3$0$';
+
+  my (undef, $signature, $version, $pkt_num, $salt, $engineID, $digest) = split '\$', $hash;
+
+  return unless defined $signature;
+  return unless defined $version;
+  return unless defined $pkt_num;
+  return unless defined $salt;
+  return unless defined $engineID;
+  return unless defined $digest;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  # gen md5 & sha1 hashes
+
+  my $new_hash_md5 = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID, 1);
+  my $new_hash_sha1 = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID, 2);
+
+  # parse digests
+
+  my (undef, undef, undef, undef, undef, undef, $digest_md5) = split '\$', $new_hash_md5;
+  my (undef, undef, undef, undef, undef, undef, $digest_sha1) = split '\$', $new_hash_sha1;
+
+  if ($digest eq $digest_md5)
+  {
+    return ($new_hash_md5, $word);
+  }
+  else
+  {
+    return ($new_hash_sha1, $word);
+  }
+}
+
+1;

From 7683ca19a3d7898f2fd2090b50bb9583e86bdadc Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 24 Jul 2021 18:36:10 +0200
Subject: [PATCH 07/24] Added hash-mode: SNMPv3 HMAC-SHA224-128

---
 OpenCL/m26700-pure.cl        | 369 +++++++++++++++++++++++++++++++++++
 docs/changes.txt             |   1 +
 docs/readme.txt              |   1 +
 src/modules/module_26700.c   | 329 +++++++++++++++++++++++++++++++
 tools/test_modules/m26700.pm |  79 ++++++++
 5 files changed, 779 insertions(+)
 create mode 100644 OpenCL/m26700-pure.cl
 create mode 100644 src/modules/module_26700.c
 create mode 100644 tools/test_modules/m26700.pm

diff --git a/OpenCL/m26700-pure.cl b/OpenCL/m26700-pure.cl
new file mode 100644
index 000000000..6c8f029b3
--- /dev/null
+++ b/OpenCL/m26700-pure.cl
@@ -0,0 +1,369 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.cl"
+#include "inc_common.cl"
+#include "inc_simd.cl"
+#include "inc_hash_sha224.cl"
+#endif
+
+#define COMPARE_S "inc_comp_single.cl"
+#define COMPARE_M "inc_comp_multi.cl"
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         32
+#define SNMPV3_MSG_AUTH_PARAMS_MAX  16
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_sha224_tmp
+{
+  u32 tmp[SNMPV3_TMP_ELEMS];
+  u32 h[SNMPV3_HASH_ELEMS];
+
+} hmac_sha224_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+KERNEL_FQ void m26700_init (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  u8 *src_ptr = (u8 *) w;
+
+  // password 64 times, also swapped
+
+  u32 dst_buf[16];
+
+  u8 *dst_ptr = (u8 *) dst_buf;
+
+  int tmp_idx = 0;
+
+  for (int i = 0; i < 64; i++)
+  {
+    for (int j = 0; j < pw_len; j++)
+    {
+      const int dst_idx = tmp_idx & 63;
+
+      dst_ptr[dst_idx] = src_ptr[j];
+
+      // write to global memory every time 64 byte are written into cache
+
+      if (dst_idx == 63)
+      {
+        const int tmp_idx4 = (tmp_idx - 63) / 4;
+
+        tmps[gid].tmp[tmp_idx4 +  0] = hc_swap32_S (dst_buf[ 0]);
+        tmps[gid].tmp[tmp_idx4 +  1] = hc_swap32_S (dst_buf[ 1]);
+        tmps[gid].tmp[tmp_idx4 +  2] = hc_swap32_S (dst_buf[ 2]);
+        tmps[gid].tmp[tmp_idx4 +  3] = hc_swap32_S (dst_buf[ 3]);
+        tmps[gid].tmp[tmp_idx4 +  4] = hc_swap32_S (dst_buf[ 4]);
+        tmps[gid].tmp[tmp_idx4 +  5] = hc_swap32_S (dst_buf[ 5]);
+        tmps[gid].tmp[tmp_idx4 +  6] = hc_swap32_S (dst_buf[ 6]);
+        tmps[gid].tmp[tmp_idx4 +  7] = hc_swap32_S (dst_buf[ 7]);
+        tmps[gid].tmp[tmp_idx4 +  8] = hc_swap32_S (dst_buf[ 8]);
+        tmps[gid].tmp[tmp_idx4 +  9] = hc_swap32_S (dst_buf[ 9]);
+        tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
+        tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
+        tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
+        tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
+        tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
+        tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
+      }
+
+      tmp_idx++;
+    }
+  }
+
+  // hash
+
+  tmps[gid].h[0] = SHA224M_A;
+  tmps[gid].h[1] = SHA224M_B;
+  tmps[gid].h[2] = SHA224M_C;
+  tmps[gid].h[3] = SHA224M_D;
+  tmps[gid].h[4] = SHA224M_E;
+  tmps[gid].h[5] = SHA224M_F;
+  tmps[gid].h[6] = SHA224M_G;
+  tmps[gid].h[7] = SHA224M_H;
+}
+
+KERNEL_FQ void m26700_loop (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 h[8];
+
+  h[0] = tmps[gid].h[0];
+  h[1] = tmps[gid].h[1];
+  h[2] = tmps[gid].h[2];
+  h[3] = tmps[gid].h[3];
+  h[4] = tmps[gid].h[4];
+  h[5] = tmps[gid].h[5];
+  h[6] = tmps[gid].h[6];
+  h[7] = tmps[gid].h[7];
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  const int pw_len64 = pw_len * 64;
+
+  #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
+                                    // for pw length > 64 we use global memory reads
+
+  u32 tmp[SNMPV3_TMP_ELEMS_OPT];
+
+  if (pw_len < 64)
+  {
+    for (int i = 0; i < pw_len64 / 4; i++)
+    {
+      tmp[i] = tmps[gid].tmp[i];
+    }
+
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      w0[0] = tmp[idx +  0];
+      w0[1] = tmp[idx +  1];
+      w0[2] = tmp[idx +  2];
+      w0[3] = tmp[idx +  3];
+      w1[0] = tmp[idx +  4];
+      w1[1] = tmp[idx +  5];
+      w1[2] = tmp[idx +  6];
+      w1[3] = tmp[idx +  7];
+      w2[0] = tmp[idx +  8];
+      w2[1] = tmp[idx +  9];
+      w2[2] = tmp[idx + 10];
+      w2[3] = tmp[idx + 11];
+      w3[0] = tmp[idx + 12];
+      w3[1] = tmp[idx + 13];
+      w3[2] = tmp[idx + 14];
+      w3[3] = tmp[idx + 15];
+
+      sha224_transform (w0, w1, w2, w3, h);
+    }
+  }
+  else
+  {
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      w0[0] = tmps[gid].tmp[idx +  0];
+      w0[1] = tmps[gid].tmp[idx +  1];
+      w0[2] = tmps[gid].tmp[idx +  2];
+      w0[3] = tmps[gid].tmp[idx +  3];
+      w1[0] = tmps[gid].tmp[idx +  4];
+      w1[1] = tmps[gid].tmp[idx +  5];
+      w1[2] = tmps[gid].tmp[idx +  6];
+      w1[3] = tmps[gid].tmp[idx +  7];
+      w2[0] = tmps[gid].tmp[idx +  8];
+      w2[1] = tmps[gid].tmp[idx +  9];
+      w2[2] = tmps[gid].tmp[idx + 10];
+      w2[3] = tmps[gid].tmp[idx + 11];
+      w3[0] = tmps[gid].tmp[idx + 12];
+      w3[1] = tmps[gid].tmp[idx + 13];
+      w3[2] = tmps[gid].tmp[idx + 14];
+      w3[3] = tmps[gid].tmp[idx + 15];
+
+      sha224_transform (w0, w1, w2, w3, h);
+    }
+  }
+
+  tmps[gid].h[0] = h[0];
+  tmps[gid].h[1] = h[1];
+  tmps[gid].h[2] = h[2];
+  tmps[gid].h[3] = h[3];
+  tmps[gid].h[4] = h[4];
+  tmps[gid].h[5] = h[5];
+  tmps[gid].h[6] = h[6];
+  tmps[gid].h[7] = h[7];
+}
+
+KERNEL_FQ void m26700_comp (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  w0[0] = 0x80000000;
+  w0[1] = 0;
+  w0[2] = 0;
+  w0[3] = 0;
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 1048576 * 8;
+
+  u32 h[8];
+
+  h[0] = tmps[gid].h[0];
+  h[1] = tmps[gid].h[1];
+  h[2] = tmps[gid].h[2];
+  h[3] = tmps[gid].h[3];
+  h[4] = tmps[gid].h[4];
+  h[5] = tmps[gid].h[5];
+  h[6] = tmps[gid].h[6];
+  h[7] = tmps[gid].h[7];
+
+  sha224_transform (w0, w1, w2, w3, h);
+
+  sha224_ctx_t ctx;
+
+  sha224_init (&ctx);
+
+  u32 w[16];
+
+  w[ 0] = h[0];
+  w[ 1] = h[1];
+  w[ 2] = h[2];
+  w[ 3] = h[3];
+  w[ 4] = h[4];
+  w[ 5] = h[5];
+  w[ 6] = h[6];
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha224_update (&ctx, w, 28);
+
+  sha224_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
+
+  w[ 0] = h[0];
+  w[ 1] = h[1];
+  w[ 2] = h[2];
+  w[ 3] = h[3];
+  w[ 4] = h[4];
+  w[ 5] = h[5];
+  w[ 6] = h[6];
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha224_update (&ctx, w, 28);
+
+  sha224_final (&ctx);
+
+  w[ 0] = ctx.h[0];
+  w[ 1] = ctx.h[1];
+  w[ 2] = ctx.h[2];
+  w[ 3] = ctx.h[3];
+  w[ 4] = ctx.h[4];
+  w[ 5] = ctx.h[5];
+  w[ 6] = ctx.h[6];
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha224_hmac_ctx_t hmac_ctx;
+
+  sha224_hmac_init (&hmac_ctx, w, 28);
+
+  sha224_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
+
+  sha224_hmac_final (&hmac_ctx);
+
+  const u32 r0 = hmac_ctx.opad.h[DGST_R0];
+  const u32 r1 = hmac_ctx.opad.h[DGST_R1];
+  const u32 r2 = hmac_ctx.opad.h[DGST_R2];
+  const u32 r3 = hmac_ctx.opad.h[DGST_R3];
+
+  #define il_pos 0
+
+  #ifdef KERNEL_STATIC
+  #include COMPARE_M
+  #endif
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index 7e0aab1a2..0de6f097a 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -39,6 +39,7 @@
 
 - Added hash-mode: SNMPv3 HMAC-SHA1-96
 - Added hash-mode: SNMPv3 HMAC-MD5-96
+- Added hash-mode: SNMPv3 HMAC-SHA224-128
 
 * changes v6.2.2 -> v6.2.3
 
diff --git a/docs/readme.txt b/docs/readme.txt
index cff519531..b5aad87aa 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -157,6 +157,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - IKE-PSK SHA1
 - SNMPv3 HMAC-MD5-96
 - SNMPv3 HMAC-SHA1-96
+- SNMPv3 HMAC-SHA224-128
 - WPA-EAPOL-PBKDF2
 - WPA-EAPOL-PMK
 - WPA-PBKDF2-PMKID+EAPOL
diff --git a/src/modules/module_26700.c b/src/modules/module_26700.c
new file mode 100644
index 000000000..15da3a60d
--- /dev/null
+++ b/src/modules/module_26700.c
@@ -0,0 +1,329 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+#include "memory.h"
+#include "emu_inc_hash_sha1.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_4;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
+static const char *HASH_NAME      = "SNMPv3 HMAC-SHA224-128";
+static const u64   KERN_TYPE      = 26700;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
+static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "$SNMPv3$3$93139992$221741464175523704413635982825760096118979556553098267101930601704853783146704303603164898517490303649758413279881023268227639264274559738208032094697403441579675568418814746064423158072029964334558571907882883041105245436623239742039483870313304031171307046174561938247029298397351679655253476035738973220651902635644891207741346383906360172060617958001549207150418505701978225626879116088671275359841611906258964723020692629233701447389366763685772212471681367034365005843875040967496437639996409692554570118676609568987002911124689769902674963799843406930141309408517459025165858554235820857416473466773963181853809212740450911140184957236422993171860303971025966646341351680880393147830452957802708608458538439866404321876100995381875117293904251031322241811475664324823327065168205689694742596451920374170034310748505203093091474865128628752667403895211365282260392475024320221767588855410235114859725219681974195474606697679001625416351117081484601569226697700302476076379$1759ce$cb8436f8e5b49d52a60d0ee076a79a97";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+static const char *SIGNATURE_SNMPV3 = "$SNMPv3$3$";
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         32
+#define SNMPV3_MSG_AUTH_PARAMS_MAX  16
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_sha224_tmp
+{
+  u32 tmp[SNMPV3_TMP_ELEMS];
+  u32 h[SNMPV3_HASH_ELEMS];
+
+} hmac_sha224_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (snmpv3_t);
+
+  return esalt_size;
+}
+
+u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = (const u64) sizeof (hmac_sha224_tmp_t);
+
+  return tmp_size;
+}
+
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  // we need to fix iteration count to guarantee the loop count is a multiple of 64
+  // 2k calls to sha224_transform typically is enough to overtime pcie bottleneck
+
+  const u32 kernel_loops_min = 2048 * 64;
+
+  return kernel_loops_min;
+}
+
+u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_max = 2048 * 64;
+
+  return kernel_loops_max;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  token_t token;
+
+  token.token_cnt  = 5;
+  token.signatures_cnt    = 1;
+  token.signatures_buf[0] = SIGNATURE_SNMPV3;
+
+  token.len[0]     = 10;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  // packet number
+  token.len_min[1] = 1;
+  token.len_max[1] = 8;
+  token.sep[1]     = '$';
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+  // salt
+  token.len_min[2] = 16 * 2;
+  token.len_max[2] = SNMPV3_SALT_MAX * 2;
+  token.sep[2]     = '$';
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  // engineid
+  token.len_min[3] = 5;
+  token.len_max[3] = SNMPV3_ENGINEID_MAX;
+  token.sep[3]     = '$';
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  // digest
+  token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.sep[4]     = '$';
+  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  // packet number
+
+  const u8 *packet_number_pos = token.buf[1];
+  const int packet_number_len = token.len[1];
+
+  memset (snmpv3->packet_number, 0, sizeof (snmpv3->packet_number));
+
+  strncpy ((char *) snmpv3->packet_number, (char *) packet_number_pos, packet_number_len);
+
+  // salt
+
+  const u8 *salt_pos = token.buf[2];
+  const int salt_len = token.len[2];
+
+  u8 *salt_ptr = (u8 *) snmpv3->salt_buf;
+
+  snmpv3->salt_len = hex_decode (salt_pos, salt_len, salt_ptr);
+
+  salt->salt_iter = SNMPV3_ROUNDS;
+
+  // handle unique salts detection
+
+  sha1_ctx_t sha1_ctx;
+
+  sha1_init   (&sha1_ctx);
+  sha1_update (&sha1_ctx, snmpv3->salt_buf, snmpv3->salt_len);
+  sha1_final  (&sha1_ctx);
+
+  // store sha1(snmpv3->salt_buf) in salt_buf
+
+  salt->salt_len = 20;
+
+  memcpy (salt->salt_buf, sha1_ctx.h, salt->salt_len);
+
+  // engineid
+
+  const u8 *engineID_pos = token.buf[3];
+  const int engineID_len = token.len[3];
+
+  u8 *engineID_ptr = (u8 *) snmpv3->engineID_buf;
+
+  snmpv3->engineID_len = hex_decode (engineID_pos, engineID_len, engineID_ptr);
+
+  // digest
+
+  const u8 *hash_pos = token.buf[4];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+
+  digest[0] = byte_swap_32 (digest[0]);
+  digest[1] = byte_swap_32 (digest[1]);
+  digest[2] = byte_swap_32 (digest[2]);
+  digest[3] = byte_swap_32 (digest[3]);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = snprintf (line_buf, line_size, "%s%s$", SIGNATURE_SNMPV3, (char *) snmpv3->packet_number);
+
+  out_len += hex_encode ((u8 *) snmpv3->salt_buf, snmpv3->salt_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  out_len += hex_encode ((u8 *) snmpv3->engineID_buf, snmpv3->engineID_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  u32 digest_tmp[4];
+
+  digest_tmp[0] = byte_swap_32 (digest[0]);
+  digest_tmp[1] = byte_swap_32 (digest[1]);
+  digest_tmp[2] = byte_swap_32 (digest[2]);
+  digest_tmp[3] = byte_swap_32 (digest[3]);
+
+  u32_to_hex (digest_tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[3], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = 0;
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m26700.pm b/tools/test_modules/m26700.pm
new file mode 100644
index 000000000..4d77179f1
--- /dev/null
+++ b/tools/test_modules/m26700.pm
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::SHA qw (sha224 sha224_hex);
+use Digest::HMAC qw (hmac hmac_hex);
+
+sub module_constraints { [[1, 256], [32, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+  my $pkt_num = shift // int(rand(99999999));
+  my $engineID = shift // random_hex_string(6);
+
+  # make even if needed
+
+  if (length($salt) %2 == 1)
+  {
+    $salt = $salt . "8";
+  }
+
+  my $string1 = $word x 1048576;
+
+  $string1 = substr ($string1, 0, 1048576);
+
+  my $sha224_digest1 = sha224_hex ($string1);
+
+  my $buf = join '', $sha224_digest1, $engineID, $sha224_digest1;
+
+  my $sha224_digest2 = sha224(pack("H*", $buf));
+
+  my $digest = hmac_hex (pack("H*", $salt), $sha224_digest2, \&sha224);
+
+  $digest = substr ($digest, 0, 32);
+
+  my $hash = sprintf ("\$SNMPv3\$3\$%s\$%s\$%s\$%s", $pkt_num, $salt, $engineID, $digest);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my $idx = index ($line, ':');
+
+  return unless $idx >= 0;
+
+  my $hash = substr ($line, 0, $idx);
+  my $word = substr ($line, $idx + 1);
+
+  return unless length ($word) gt 0;
+  return unless substr ($hash, 0, 10) eq '$SNMPv3$3$';
+
+  my (undef, $signature, $version, $pkt_num, $salt, $engineID, $digest) = split '\$', $hash;
+
+  return unless defined $signature;
+  return unless defined $version;
+  return unless defined $pkt_num;
+  return unless defined $salt;
+  return unless defined $engineID;
+  return unless defined $digest;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID);
+
+  return ($new_hash, $word);
+}
+
+1;

From 44dbedd21a990bcd8221698fd5421b2899e9944c Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 25 Jul 2021 10:32:58 +0200
Subject: [PATCH 08/24] Use improved native threads derivation for RC4 based
 hash-modes

---
 src/modules/module_07500.c | 28 +++-------------------------
 src/modules/module_13100.c | 28 +++-------------------------
 src/modules/module_18200.c | 28 +++-------------------------
 src/modules/module_25400.c | 28 +++-------------------------
 4 files changed, 12 insertions(+), 100 deletions(-)

diff --git a/src/modules/module_07500.c b/src/modules/module_07500.c
index 1681fb4a8..7cb6e04e4 100644
--- a/src/modules/module_07500.c
+++ b/src/modules/module_07500.c
@@ -65,35 +65,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_13100.c b/src/modules/module_13100.c
index fac5cb24c..2b4e0e846 100644
--- a/src/modules/module_13100.c
+++ b/src/modules/module_13100.c
@@ -64,35 +64,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_18200.c b/src/modules/module_18200.c
index e6596306b..f4d32695e 100644
--- a/src/modules/module_18200.c
+++ b/src/modules/module_18200.c
@@ -64,35 +64,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_25400.c b/src/modules/module_25400.c
index 341837786..6325bf737 100644
--- a/src/modules/module_25400.c
+++ b/src/modules/module_25400.c
@@ -103,35 +103,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      if (device_param->device_local_mem_size < 49152)
-      {
-        native_threads = 32;
-      }
-      else
-      {
-        native_threads = 64;
-      }
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 

From 83badbeaf1f4660ca2beb08c91b8a5b788c3e874 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 25 Jul 2021 10:56:29 +0200
Subject: [PATCH 09/24] Backport native threads strategy from Kerberos to
 Office and PDF

---
 src/modules/module_09700.c | 14 +++-----------
 src/modules/module_09710.c | 14 +++-----------
 src/modules/module_09720.c | 14 +++-----------
 src/modules/module_09800.c | 14 +++-----------
 src/modules/module_09810.c | 14 +++-----------
 src/modules/module_09820.c | 14 +++-----------
 src/modules/module_10400.c | 14 +++-----------
 src/modules/module_10410.c | 14 +++-----------
 src/modules/module_10420.c | 14 +++-----------
 src/modules/module_10500.c | 14 +++-----------
 10 files changed, 30 insertions(+), 110 deletions(-)

diff --git a/src/modules/module_09700.c b/src/modules/module_09700.c
index cad911186..626f65f8f 100644
--- a/src/modules/module_09700.c
+++ b/src/modules/module_09700.c
@@ -69,21 +69,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_09710.c b/src/modules/module_09710.c
index 033f77ea0..aa63f6905 100644
--- a/src/modules/module_09710.c
+++ b/src/modules/module_09710.c
@@ -69,21 +69,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_09720.c b/src/modules/module_09720.c
index 04e99201f..e4be9f8b0 100644
--- a/src/modules/module_09720.c
+++ b/src/modules/module_09720.c
@@ -70,21 +70,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_09800.c b/src/modules/module_09800.c
index 2eb7fab05..357ecb80b 100644
--- a/src/modules/module_09800.c
+++ b/src/modules/module_09800.c
@@ -71,21 +71,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_09810.c b/src/modules/module_09810.c
index 2a1074b2c..9d3edd8d2 100644
--- a/src/modules/module_09810.c
+++ b/src/modules/module_09810.c
@@ -70,21 +70,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_09820.c b/src/modules/module_09820.c
index ea3dfe22b..9997992e4 100644
--- a/src/modules/module_09820.c
+++ b/src/modules/module_09820.c
@@ -72,21 +72,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_10400.c b/src/modules/module_10400.c
index 77416f5ce..30c95e68a 100644
--- a/src/modules/module_10400.c
+++ b/src/modules/module_10400.c
@@ -76,21 +76,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_10410.c b/src/modules/module_10410.c
index b2c98363f..25be1feb4 100644
--- a/src/modules/module_10410.c
+++ b/src/modules/module_10410.c
@@ -77,21 +77,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_10420.c b/src/modules/module_10420.c
index 23e537bf5..eecf97f2c 100644
--- a/src/modules/module_10420.c
+++ b/src/modules/module_10420.c
@@ -76,21 +76,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 
diff --git a/src/modules/module_10500.c b/src/modules/module_10500.c
index 80a8478ef..d261e4458 100644
--- a/src/modules/module_10500.c
+++ b/src/modules/module_10500.c
@@ -100,21 +100,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
   {
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    if (device_param->device_local_mem_size < 49152)
     {
-      native_threads = 8;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      native_threads = 64;
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      native_threads = 64;
+      native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
     }
     else
     {
-      native_threads = 32;
+      native_threads = device_param->kernel_preferred_wgs_multiple;
     }
   }
 

From b53691c8f570f5882aca89b71a441931e4751d15 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 26 Jul 2021 07:48:56 +0200
Subject: [PATCH 10/24] ADL: Updated support for AMD Display Library to 14.0,
 updated datatypes and added support for OverDrive 7 and 8 based GPUs

---
 include/ext_ADL.h | 697 +++++++++++++++++++++++++++++++++++-----------
 include/types.h   |   2 -
 src/ext_ADL.c     |  59 ++--
 src/hwmon.c       | 203 ++++++++++----
 4 files changed, 728 insertions(+), 233 deletions(-)

diff --git a/include/ext_ADL.h b/include/ext_ADL.h
index fd8438c8e..238453b7c 100644
--- a/include/ext_ADL.h
+++ b/include/ext_ADL.h
@@ -13,228 +13,602 @@
 #include <windows.h>
 #endif // _WIN
 
+// Values taken from display-library-14.0.zip
+
 /**
- * Declarations from adl_sdk.h and subheaders
+ * Declarations from adl_defines.h
  */
 
-#define ADL_OK                                  0
-#define ADL_ERR                                -1
-#define ADL_ERR_NOT_SUPPORTED                  -8
+/// ADL function completed successfully
+#define ADL_OK                    0
+/// Generic Error. Most likely one or more of the Escape calls to the driver failed!
+#define ADL_ERR                    -1
+
+/// Function  not supported by the driver
+#define ADL_ERR_NOT_SUPPORTED            -8
+
+/// Defines the maximum string length
+#define ADL_MAX_PATH                                    256
 
-#define ADL_MAX_PATH                            256
+//values for ADLFanSpeedValue.iSpeedType
+#define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT    1
+#define ADL_DL_FANCTRL_SPEED_TYPE_RPM        2
 
-#define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT       1
-#define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED  1
+//values for ADLFanSpeedValue.iFlags
+#define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED   1
+
+//Define Performance Metrics Log max sensors number
+#define ADL_PMLOG_MAX_SENSORS  256
+
+/**
+ * Declarations from adl_structures.h
+ */
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about the graphics adapter.
+///
+/// This structure is used to store various information about the graphics adapter.  This
+/// information can be returned to the user. Alternatively, it can be used to access various driver calls to set
+/// or fetch various settings upon the user's request.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct AdapterInfo
 {
-  int  iSize;
-  int  iAdapterIndex;
-  char strUDID[ADL_MAX_PATH];
-  int  iBusNumber;
-  int  iDeviceNumber;
-  int  iFunctionNumber;
-  int  iVendorID;
-  char strAdapterName[ADL_MAX_PATH];
-  char strDisplayName[ADL_MAX_PATH];
-  int  iPresent;
-
-  #if defined (_WIN32) || defined (_WIN64) || defined (__CYGWIN__)
-  int  iExist;
-  char strDriverPath[ADL_MAX_PATH];
-  char strDriverPathExt[ADL_MAX_PATH];
-  char strPNPString[ADL_MAX_PATH];
-  int  iOSDisplayIndex;
-  #endif /* (_WIN32) || (_WIN64) || (__CYGWIN__) */
-
-  #if defined (__linux__)
-  int  iXScreenNum;
-  int  iDrvIndex;
-  char strXScreenConfigName[ADL_MAX_PATH];
-  #endif /* (__linux__) */
+/// \ALL_STRUCT_MEM
+
+/// Size of the structure.
+    int iSize;
+/// The ADL index handle. One GPU may be associated with one or two index handles
+    int iAdapterIndex;
+/// The unique device ID associated with this adapter.
+    char strUDID[ADL_MAX_PATH];
+/// The BUS number associated with this adapter.
+    int iBusNumber;
+/// The driver number associated with this adapter.
+    int iDeviceNumber;
+/// The function number.
+    int iFunctionNumber;
+/// The vendor ID associated with this adapter.
+    int iVendorID;
+/// Adapter name.
+    char strAdapterName[ADL_MAX_PATH];
+/// Display name. For example, "\\\\Display0" for Windows or ":0:0" for Linux.
+    char strDisplayName[ADL_MAX_PATH];
+/// Present or not; 1 if present and 0 if not present.It the logical adapter is present, the display name such as \\\\.\\Display1 can be found from OS
+    int iPresent;
+
+#if defined (_WIN32) || defined (_WIN64)
+/// \WIN_STRUCT_MEM
+
+/// Exist or not; 1 is exist and 0 is not present.
+    int iExist;
+/// Driver registry path.
+    char strDriverPath[ADL_MAX_PATH];
+/// Driver registry path Ext for.
+    char strDriverPathExt[ADL_MAX_PATH];
+/// PNP string from Windows.
+    char strPNPString[ADL_MAX_PATH];
+/// It is generated from EnumDisplayDevices.
+    int iOSDisplayIndex;
+#endif /* (_WIN32) || (_WIN64) */
+
+#if defined (LINUX)
+/// \LNX_STRUCT_MEM
+
+/// Internal X screen number from GPUMapInfo (DEPRICATED use XScreenInfo)
+    int iXScreenNum;
+/// Internal driver index from GPUMapInfo
+    int iDrvIndex;
+/// \deprecated Internal x config file screen identifier name. Use XScreenInfo instead.
+    char strXScreenConfigName[ADL_MAX_PATH];
+
+#endif /* (LINUX) */
 } AdapterInfo, *LPAdapterInfo;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about thermal controller.
+///
+/// This structure is used to store information about thermal controller.
+/// This structure is used by ADL_PM_ThermalDevices_Enum.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLThermalControllerInfo
 {
+/// Must be set to the size of the structure
   int iSize;
+/// Possible valies: \ref ADL_DL_THERMAL_DOMAIN_OTHER or \ref ADL_DL_THERMAL_DOMAIN_GPU.
   int iThermalDomain;
+///    GPU 0, 1, etc.
   int iDomainIndex;
+/// Possible valies: \ref ADL_DL_THERMAL_FLAG_INTERRUPT or \ref ADL_DL_THERMAL_FLAG_FANCONTROL
   int iFlags;
 } ADLThermalControllerInfo;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about thermal controller temperature.
+///
+/// This structure is used to store information about thermal controller temperature.
+/// This structure is used by the ADL_PM_Temperature_Get() function.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLTemperature
 {
+/// Must be set to the size of the structure
   int iSize;
+/// Temperature in millidegrees Celsius.
   int iTemperature;
 } ADLTemperature;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about thermal controller fan speed.
+///
+/// This structure is used to store information about thermal controller fan speed.
+/// This structure is used by the ADL_PM_FanSpeedInfo_Get() function.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLFanSpeedInfo
 {
+/// Must be set to the size of the structure
   int iSize;
+/// \ref define_fanctrl
   int iFlags;
+/// Minimum possible fan speed value in percents.
   int iMinPercent;
+/// Maximum possible fan speed value in percents.
   int iMaxPercent;
+/// Minimum possible fan speed value in RPM.
   int iMinRPM;
+/// Maximum possible fan speed value in RPM.
   int iMaxRPM;
 } ADLFanSpeedInfo;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about fan speed reported by thermal controller.
+///
+/// This structure is used to store information about fan speed reported by thermal controller.
+/// This structure is used by the ADL_Overdrive5_FanSpeed_Get() and ADL_Overdrive5_FanSpeed_Set() functions.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLFanSpeedValue
 {
+/// Must be set to the size of the structure
   int iSize;
+/// Possible valies: \ref ADL_DL_FANCTRL_SPEED_TYPE_PERCENT or \ref ADL_DL_FANCTRL_SPEED_TYPE_RPM
   int iSpeedType;
+/// Fan speed value
   int iFanSpeed;
+/// The only flag for now is: \ref ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED
   int iFlags;
 } ADLFanSpeedValue;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about the display device.
+///
+/// This structure is used to store display device information
+/// such as display index, type, name, connection status, mapped adapter and controller indexes,
+/// whether or not multiple VPUs are supported, local display connections or not (through Lasso), etc.
+/// This information can be returned to the user. Alternatively, it can be used to access various driver calls to set
+/// or fetch various display device related settings upon the user's request.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLDisplayID
 {
-  int iDisplayLogicalIndex;
-  int iDisplayPhysicalIndex;
-  int iDisplayLogicalAdapterIndex;
-  int iDisplayPhysicalAdapterIndex;
+/// The logical display index belonging to this adapter.
+    int iDisplayLogicalIndex;
+
+///\brief The physical display index.
+/// For example, display index 2 from adapter 2 can be used by current adapter 1.\n
+/// So current adapter may enumerate this adapter as logical display 7 but the physical display
+/// index is still 2.
+    int iDisplayPhysicalIndex;
+
+/// The persistent logical adapter index for the display.
+    int iDisplayLogicalAdapterIndex;
+
+///\brief The persistent physical adapter index for the display.
+/// It can be the current adapter or a non-local adapter. \n
+/// If this adapter index is different than the current adapter,
+/// the Display Non Local flag is set inside DisplayInfoValue.
+    int iDisplayPhysicalAdapterIndex;
 } ADLDisplayID, *LPADLDisplayID;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about the display device.
+///
+/// This structure is used to store various information about the display device.  This
+/// information can be returned to the user, or used to access various driver calls to set
+/// or fetch various display-device-related settings upon the user's request
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLDisplayInfo
 {
-  ADLDisplayID displayID;
-  int  iDisplayControllerIndex;
-  char strDisplayName[ADL_MAX_PATH];
-  char strDisplayManufacturerName[ADL_MAX_PATH];
-  int  iDisplayType;
-  int  iDisplayOutputType;
-  int  iDisplayConnector;
-  int  iDisplayInfoMask;
-  int  iDisplayInfoValue;
+/// The DisplayID structure
+    ADLDisplayID displayID;
+
+///\deprecated The controller index to which the display is mapped.\n Will not be used in the future\n
+    int  iDisplayControllerIndex;
+
+/// The display's EDID name.
+    char strDisplayName[ADL_MAX_PATH];
+
+/// The display's manufacturer name.
+    char strDisplayManufacturerName[ADL_MAX_PATH];
+
+/// The Display type. For example: CRT, TV, CV, DFP.
+    int  iDisplayType;
+
+/// The display output type. For example: HDMI, SVIDEO, COMPONMNET VIDEO.
+    int  iDisplayOutputType;
+
+/// The connector type for the device.
+    int  iDisplayConnector;
+
+///\brief The bit mask identifies the number of bits ADLDisplayInfo is currently using. \n
+/// It will be the sum all the bit definitions in ADL_DISPLAY_DISPLAYINFO_xxx.
+    int  iDisplayInfoMask;
+
+/// The bit mask identifies the display status. \ref define_displayinfomask
+    int  iDisplayInfoValue;
 } ADLDisplayInfo, *LPADLDisplayInfo;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+/// \brief Structure containing information about the BIOS.
+///
+/// This structure is used to store various information about the Chipset.  This
+/// information can be returned to the user.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLBiosInfo
 {
-  char strPartNumber[ADL_MAX_PATH];
-  char strVersion[ADL_MAX_PATH];
-  char strDate[ADL_MAX_PATH];
+    char strPartNumber[ADL_MAX_PATH];    ///< Part number.
+    char strVersion[ADL_MAX_PATH];        ///< Version number.
+    char strDate[ADL_MAX_PATH];        ///< BIOS date in yyyy/mm/dd hh:mm format.
 } ADLBiosInfo, *LPADLBiosInfo;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about current power management related activity.
+///
+/// This structure is used to store information about current power management related activity.
+/// This structure (Overdrive 5 interfaces) is used by the ADL_PM_CurrentActivity_Get() function.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLPMActivity
 {
-  int iSize;
-  int iEngineClock;
-  int iMemoryClock;
-  int iVddc;
-  int iActivityPercent;
-  int iCurrentPerformanceLevel;
-  int iCurrentBusSpeed;
-  int iCurrentBusLanes;
-  int iMaximumBusLanes;
-  int iReserved;
+/// Must be set to the size of the structure
+    int iSize;
+/// Current engine clock.
+    int iEngineClock;
+/// Current memory clock.
+    int iMemoryClock;
+/// Current core voltage.
+    int iVddc;
+/// GPU utilization.
+    int iActivityPercent;
+/// Performance level index.
+    int iCurrentPerformanceLevel;
+/// Current PCIE bus speed.
+    int iCurrentBusSpeed;
+/// Number of PCIE bus lanes.
+    int iCurrentBusLanes;
+/// Maximum number of PCIE bus lanes.
+    int iMaximumBusLanes;
+/// Reserved for future purposes.
+    int iReserved;
 } ADLPMActivity;
 
+////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing the range of Overdrive parameter.
+///
+/// This structure is used to store information about the range of Overdrive parameter.
+/// This structure is used by ADLODParameters.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLODParameterRange
 {
+/// Minimum parameter value.
   int iMin;
+/// Maximum parameter value.
   int iMax;
+/// Parameter step value.
   int iStep;
 } ADLODParameterRange;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive parameters.
+///
+/// This structure is used to store information about Overdrive parameters.
+/// This structure is used by the ADL_Overdrive5_ODParameters_Get() function.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
 typedef struct ADLODParameters
 {
+/// Must be set to the size of the structure
   int iSize;
+/// Number of standard performance states.
   int iNumberOfPerformanceLevels;
+/// Indicates whether the GPU is capable to measure its activity.
   int iActivityReportingSupported;
+/// Indicates whether the GPU supports discrete performance levels or performance range.
   int iDiscretePerformanceLevels;
+/// Reserved for future use.
   int iReserved;
+/// Engine clock range.
   ADLODParameterRange sEngineClock;
+/// Memory clock range.
   ADLODParameterRange sMemoryClock;
+/// Core voltage range.
   ADLODParameterRange sVddc;
 } ADLODParameters;
 
-typedef struct ADLODPerformanceLevel
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive 6 fan speed information
+///
+/// This structure is used to store information about Overdrive 6 fan speed information
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6FanSpeedInfo
 {
-  int iEngineClock;
-  int iMemoryClock;
-  int iVddc;
-} ADLODPerformanceLevel;
+    /// Contains a bitmap of the valid fan speed type flags.  Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM, \ref ADL_OD6_FANSPEED_USER_DEFINED
+    int     iSpeedType;
+    /// Contains current fan speed in percent (if valid flag exists in iSpeedType)
+    int     iFanSpeedPercent;
+    /// Contains current fan speed in RPM (if valid flag exists in iSpeedType)
+    int        iFanSpeedRPM;
+
+    /// Value for future extension
+    int     iExtValue;
+    /// Mask for future extension
+    int     iExtMask;
 
-/*
- * Attention: we had to change this struct due to an out-of-bound problem mentioned here:
- * https://github.com/hashcat/hashcat/issues/244
- * the change: ADLODPerformanceLevel aLevels [1] -> ADLODPerformanceLevel aLevels [2]
- */
+} ADLOD6FanSpeedInfo;
 
-typedef struct ADLODPerformanceLevels
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive 6 fan speed value
+///
+/// This structure is used to store information about Overdrive 6 fan speed value
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6FanSpeedValue
 {
-  int iSize;
-  int iReserved;
-  ADLODPerformanceLevel aLevels [2];
-} ADLODPerformanceLevels;
+    /// Indicates the units of the fan speed.  Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM
+    int     iSpeedType;
+    /// Fan speed value (units as indicated above)
+    int     iFanSpeed;
 
-typedef struct ADLOD6FanSpeedInfo
-{
-  int iSpeedType;
-  int iFanSpeedPercent;
-  int iFanSpeedRPM;
-  int iExtValue;
-  int iExtMask;
-} ADLOD6FanSpeedInfo;
+    /// Value for future extension
+    int     iExtValue;
+    /// Mask for future extension
+    int     iExtMask;
 
-typedef struct ADLOD6FanSpeedValue
-{
-  int iSpeedType;
-  int iFanSpeed;
-  int iExtValue;
-  int iExtMask;
 } ADLOD6FanSpeedValue;
 
-typedef struct ADLOD6CurrentStatus
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about current Overdrive 6 performance status.
+///
+/// This structure is used to store information about current Overdrive 6 performance status.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6CurrentStatus
 {
-  int iEngineClock;
-  int iMemoryClock;
-  int iActivityPercent;
-  int iCurrentPerformanceLevel;
-  int iCurrentBusSpeed;
-  int iCurrentBusLanes;
-  int iMaximumBusLanes;
-  int iExtValue;
-  int iExtMask;
+    /// Current engine clock in 10 KHz.
+    int     iEngineClock;
+    /// Current memory clock in 10 KHz.
+    int     iMemoryClock;
+    /// Current GPU activity in percent.  This
+    /// indicates how "busy" the GPU is.
+    int     iActivityPercent;
+    /// Not used.  Reserved for future use.
+    int     iCurrentPerformanceLevel;
+    /// Current PCI-E bus speed
+    int     iCurrentBusSpeed;
+    /// Current PCI-E bus # of lanes
+    int     iCurrentBusLanes;
+    /// Maximum possible PCI-E bus # of lanes
+    int     iMaximumBusLanes;
+
+    /// Value for future extension
+    int     iExtValue;
+    /// Mask for future extension
+    int     iExtMask;
+
 } ADLOD6CurrentStatus;
 
-typedef struct ADLOD6ParameterRange
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive 6 clock range
+///
+/// This structure is used to store information about Overdrive 6 clock range
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6ParameterRange
 {
-  int iMin;
-  int iMax;
-  int iStep;
+    /// The starting value of the clock range
+    int     iMin;
+    /// The ending value of the clock range
+    int     iMax;
+    /// The minimum increment between clock values
+    int     iStep;
+
 } ADLOD6ParameterRange;
 
-typedef struct ADLOD6Capabilities
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive 6 capabilities
+///
+/// This structure is used to store information about Overdrive 6 capabilities
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6Capabilities
 {
-  int iCapabilities;
-  int iSupportedStates;
-  int iNumberOfPerformanceLevels;
-  ADLOD6ParameterRange sEngineClockRange;
-  ADLOD6ParameterRange sMemoryClockRange;
-  int iExtValue;
-  int iExtMask;
+    /// Contains a bitmap of the OD6 capability flags.  Possible values: \ref ADL_OD6_CAPABILITY_SCLK_CUSTOMIZATION,
+    /// \ref ADL_OD6_CAPABILITY_MCLK_CUSTOMIZATION, \ref ADL_OD6_CAPABILITY_GPU_ACTIVITY_MONITOR
+    int     iCapabilities;
+    /// Contains a bitmap indicating the power states
+    /// supported by OD6.  Currently only the performance state
+    /// is supported. Possible Values: \ref ADL_OD6_SUPPORTEDSTATE_PERFORMANCE
+    int     iSupportedStates;
+    /// Number of levels. OD6 will always use 2 levels, which describe
+    /// the minimum to maximum clock ranges.
+    /// The 1st level indicates the minimum clocks, and the 2nd level
+    /// indicates the maximum clocks.
+    int     iNumberOfPerformanceLevels;
+    /// Contains the hard limits of the sclk range.  Overdrive
+    /// clocks cannot be set outside this range.
+    ADLOD6ParameterRange     sEngineClockRange;
+    /// Contains the hard limits of the mclk range.  Overdrive
+    /// clocks cannot be set outside this range.
+    ADLOD6ParameterRange     sMemoryClockRange;
+
+    /// Value for future extension
+    int     iExtValue;
+    /// Mask for future extension
+    int     iExtMask;
+
 } ADLOD6Capabilities;
 
-typedef struct ADLOD6PerformanceLevel
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive level.
+///
+/// This structure is used to store information about Overdrive level.
+/// This structure is used by ADLODPerformanceLevels.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct ADLODPerformanceLevel
 {
+/// Engine clock.
   int iEngineClock;
+/// Memory clock.
   int iMemoryClock;
-} ADLOD6PerformanceLevel;
+/// Core voltage.
+  int iVddc;
+} ADLODPerformanceLevel;
 
-/*
- * Attention: we had to change this struct due to an out-of-bound problem mentioned here:
- * https://github.com/hashcat/hashcat/issues/244
- * the change: ADLOD6PerformanceLevel aLevels [1] -> ADLOD6PerformanceLevel aLevels [2]
- */
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive 6 clock values.
+///
+/// This structure is used to store information about Overdrive 6 clock values.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6PerformanceLevel
+{
+    /// Engine (core) clock.
+    int iEngineClock;
+    /// Memory clock.
+    int iMemoryClock;
+
+} ADLOD6PerformanceLevel;
 
-typedef struct ADLOD6StateInfo
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive 6 clocks.
+///
+/// This structure is used to store information about Overdrive 6 clocks.  This is a
+/// variable-sized structure.  iNumberOfPerformanceLevels indicate how many elements
+/// are contained in the aLevels array.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLOD6StateInfo
 {
-  int iNumberOfPerformanceLevels;
-  int iExtValue;
-  int iExtMask;
-  ADLOD6PerformanceLevel aLevels [2];
+    /// Number of levels.  OD6 uses clock ranges instead of discrete performance levels.
+    /// iNumberOfPerformanceLevels is always 2.  The 1st level indicates the minimum clocks
+    /// in the range.  The 2nd level indicates the maximum clocks in the range.
+    int     iNumberOfPerformanceLevels;
+
+    /// Value for future extension
+    int     iExtValue;
+    /// Mask for future extension
+    int     iExtMask;
+
+    /// Variable-sized array of levels.
+    /// The number of elements in the array is specified by iNumberofPerformanceLevels.
+    ADLOD6PerformanceLevel aLevels [1];
+
 } ADLOD6StateInfo;
 
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Overdrive performance levels.
+///
+/// This structure is used to store information about Overdrive performance levels.
+/// This structure is used by the ADL_Overdrive5_ODPerformanceLevels_Get() and ADL_Overdrive5_ODPerformanceLevels_Set() functions.
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct ADLODPerformanceLevels
+{
+/// Must be set to sizeof( \ref ADLODPerformanceLevels ) + sizeof( \ref ADLODPerformanceLevel ) * (ADLODParameters.iNumberOfPerformanceLevels - 1)
+  int iSize;
+  int iReserved;
+/// Array of performance state descriptors. Must have ADLODParameters.iNumberOfPerformanceLevels elements.
+  ADLODPerformanceLevel aLevels [1];
+} ADLODPerformanceLevels;
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+///\brief Structure containing information about Performance Metrics data
+///
+/// This structure is used to store information about Performance Metrics data output
+/// \nosubgrouping
+////////////////////////////////////////////////////////////////////////////////////////////
+typedef struct _ADLSingleSensorData
+{
+    int supported;
+    int  value;
+} ADLSingleSensorData;
+
+typedef struct _ADLPMLogDataOutput
+{
+    int size;
+    ADLSingleSensorData sensors[ADL_PMLOG_MAX_SENSORS];
+}ADLPMLogDataOutput;
+
+typedef enum _ADLSensorType
+{
+	SENSOR_MAXTYPES = 0,
+	PMLOG_CLK_GFXCLK = 1,
+	PMLOG_CLK_MEMCLK = 2,
+	PMLOG_CLK_SOCCLK = 3,
+	PMLOG_CLK_UVDCLK1 = 4,
+	PMLOG_CLK_UVDCLK2 = 5,
+	PMLOG_CLK_VCECLK = 6,
+	PMLOG_CLK_VCNCLK = 7,
+	PMLOG_TEMPERATURE_EDGE = 8,
+	PMLOG_TEMPERATURE_MEM = 9,
+	PMLOG_TEMPERATURE_VRVDDC = 10,
+	PMLOG_TEMPERATURE_VRMVDD = 11,
+	PMLOG_TEMPERATURE_LIQUID = 12,
+	PMLOG_TEMPERATURE_PLX = 13,
+	PMLOG_FAN_RPM = 14,
+	PMLOG_FAN_PERCENTAGE = 15,
+	PMLOG_SOC_VOLTAGE = 16,
+	PMLOG_SOC_POWER = 17,
+	PMLOG_SOC_CURRENT = 18,
+	PMLOG_INFO_ACTIVITY_GFX = 19,
+	PMLOG_INFO_ACTIVITY_MEM = 20,
+	PMLOG_GFX_VOLTAGE = 21,
+	PMLOG_MEM_VOLTAGE = 22,
+	PMLOG_ASIC_POWER = 23,
+	PMLOG_TEMPERATURE_VRSOC = 24,
+	PMLOG_TEMPERATURE_VRMVDD0 = 25,
+	PMLOG_TEMPERATURE_VRMVDD1 = 26,
+	PMLOG_TEMPERATURE_HOTSPOT = 27,
+        PMLOG_TEMPERATURE_GFX = 28,
+        PMLOG_TEMPERATURE_SOC = 29,
+        PMLOG_GFX_POWER = 30,
+        PMLOG_GFX_CURRENT = 31,
+        PMLOG_TEMPERATURE_CPU = 32,
+        PMLOG_CPU_POWER = 33,
+        PMLOG_CLK_CPUCLK = 34,
+        PMLOG_THROTTLER_STATUS = 35,
+        PMLOG_CLK_VCN1CLK1 = 36,
+        PMLOG_CLK_VCN1CLK2 = 37,
+        PMLOG_SMART_POWERSHIFT_CPU = 38,
+        PMLOG_SMART_POWERSHIFT_DGPU = 39,
+	PMLOG_MAX_SENSORS_REAL
+} ADLSensorType;
+
+/// \brief Handle to ADL client context.
+///
+///  ADL clients obtain context handle from initial call to \ref ADL2_Main_Control_Create.
+///  Clients have to pass the handle to each subsequent ADL call and finally destroy
+///  the context with call to \ref ADL2_Main_Control_Destroy
+/// \nosubgrouping
+typedef void *ADL_CONTEXT_HANDLE;
+
 #if defined (__MSC_VER)
 #define ADL_API_CALL __cdecl
 #elif defined (_WIN32) || defined (__WIN32__)
@@ -251,62 +625,53 @@ typedef void* (ADL_API_CALL *ADL_MAIN_MALLOC_CALLBACK )( int );
 
 typedef int HM_ADAPTER_ADL;
 
-typedef struct struct_ADLOD6MemClockState
-{
-  ADLOD6StateInfo state;
-  ADLOD6PerformanceLevel level;
-
-} ADLOD6MemClockState;
-
-typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY) (void);
-typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE) (ADL_MAIN_MALLOC_CALLBACK, int);
-typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET) (int *);
-typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET) (LPAdapterInfo, int);
-typedef int (ADL_API_CALL *ADL_DISPLAY_DISPLAYINFO_GET) (int, int *, ADLDisplayInfo **, int);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_TEMPERATURE_GET) (int, int, ADLTemperature *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TEMPERATURE_GET) (int, int *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET) (int, ADLPMActivity *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_THERMALDEVICES_ENUM) (int, int, ADLThermalControllerInfo *);
-typedef int (ADL_API_CALL *ADL_ADAPTER_ID_GET) (int, int *);
-typedef int (ADL_API_CALL *ADL_ADAPTER_VIDEOBIOSINFO_GET) (int, ADLBiosInfo *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEEDINFO_GET) (int, int, ADLFanSpeedInfo *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEED_GET) (int, int, ADLFanSpeedValue *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_FANSPEED_GET) (int, ADLOD6FanSpeedInfo *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPARAMETERS_GET) (int, ADLODParameters *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET) (int, int, ADLODPerformanceLevels *);
-typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET) (int, int *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE_CAPS) (int, int *, int *, int *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CURRENTSTATUS_GET) (int, ADLOD6CurrentStatus *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_STATEINFO_GET) (int, int, ADLOD6MemClockState *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CAPABILITIES_GET) (int, ADLOD6Capabilities *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TARGETTEMPERATUREDATA_GET) (int, int *, int *);
-typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TARGETTEMPERATURERANGEINFO_GET) (int, ADLOD6ParameterRange *);
+typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET ) ( int, int* );
+typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET ) ( LPAdapterInfo, int );
+typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
+typedef int (ADL_API_CALL *ADL_DISPLAY_DISPLAYINFO_GET ) ( int, int *, ADLDisplayInfo **, int );
+typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
+typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )();
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET ) (int iAdapterIndex, ADLPMActivity *lpActivity);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEEDINFO_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedInfo *lpFanSpeedInfo);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEED_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPARAMETERS_GET ) (int iAdapterIndex, ADLODParameters *lpOdParameters);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET ) (int iAdapterIndex, int iDefault, ADLODPerformanceLevels *lpOdPerformanceLevels);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_TEMPERATURE_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLTemperature *lpTemperature);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE5_THERMALDEVICES_ENUM ) (int iAdapterIndex, int iThermalControllerIndex, ADLThermalControllerInfo *lpThermalControllerInfo);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CAPABILITIES_GET ) (int iAdapterIndex, ADLOD6Capabilities *lpODCapabilities);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CURRENTSTATUS_GET )(int iAdapterIndex, ADLOD6CurrentStatus *lpCurrentStatus);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE6_FANSPEED_GET )(int iAdapterIndex, ADLOD6FanSpeedInfo *lpFanSpeedInfo);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE6_STATEINFO_GET )(int iAdapterIndex, int iStateType, ADLOD6StateInfo *lpStateInfo);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TEMPERATURE_GET )(int iAdapterIndex, int *lpTemperature);
+typedef int (ADL_API_CALL *ADL_OVERDRIVE_CAPS ) (int iAdapterIndex, int *iSupported, int *iEnabled, int *iVersion);
+typedef int (ADL_API_CALL *ADL2_OVERDRIVE_CAPS) (ADL_CONTEXT_HANDLE context, int iAdapterIndex, int * iSupported, int * iEnabled, int * iVersion);
+typedef int (ADL_API_CALL *ADL2_NEW_QUERYPMLOGDATA_GET) (ADL_CONTEXT_HANDLE, int, ADLPMLogDataOutput*);
 
 typedef struct hm_adl_lib
 {
   hc_dynlib_t lib;
 
-  ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
-  ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
-  ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
+  ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get;
   ADL_ADAPTER_ADAPTERINFO_GET ADL_Adapter_AdapterInfo_Get;
+  ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
   ADL_DISPLAY_DISPLAYINFO_GET ADL_Display_DisplayInfo_Get;
-  ADL_ADAPTER_ID_GET ADL_Adapter_ID_Get;
-  ADL_ADAPTER_VIDEOBIOSINFO_GET ADL_Adapter_VideoBiosInfo_Get;
-  ADL_OVERDRIVE5_THERMALDEVICES_ENUM ADL_Overdrive5_ThermalDevices_Enum;
-  ADL_OVERDRIVE5_TEMPERATURE_GET ADL_Overdrive5_Temperature_Get;
-  ADL_OVERDRIVE6_TEMPERATURE_GET ADL_Overdrive6_Temperature_Get;
+  ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
+  ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
   ADL_OVERDRIVE5_CURRENTACTIVITY_GET ADL_Overdrive5_CurrentActivity_Get;
   ADL_OVERDRIVE5_FANSPEEDINFO_GET ADL_Overdrive5_FanSpeedInfo_Get;
   ADL_OVERDRIVE5_FANSPEED_GET ADL_Overdrive5_FanSpeed_Get;
-  ADL_OVERDRIVE6_FANSPEED_GET ADL_Overdrive6_FanSpeed_Get;
-  ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get;
-  ADL_OVERDRIVE_CAPS ADL_Overdrive_Caps;
+  ADL_OVERDRIVE5_ODPARAMETERS_GET ADL_Overdrive5_ODParameters_Get;
+  ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET ADL_Overdrive5_ODPerformanceLevels_Get;
+  ADL_OVERDRIVE5_TEMPERATURE_GET ADL_Overdrive5_Temperature_Get;
+  ADL_OVERDRIVE5_THERMALDEVICES_ENUM ADL_Overdrive5_ThermalDevices_Enum;
   ADL_OVERDRIVE6_CAPABILITIES_GET ADL_Overdrive6_Capabilities_Get;
-  ADL_OVERDRIVE6_STATEINFO_GET  ADL_Overdrive6_StateInfo_Get;
   ADL_OVERDRIVE6_CURRENTSTATUS_GET ADL_Overdrive6_CurrentStatus_Get;
-  ADL_OVERDRIVE6_TARGETTEMPERATUREDATA_GET ADL_Overdrive6_TargetTemperatureData_Get;
-  ADL_OVERDRIVE6_TARGETTEMPERATURERANGEINFO_GET ADL_Overdrive6_TargetTemperatureRangeInfo_Get;
+  ADL_OVERDRIVE6_FANSPEED_GET ADL_Overdrive6_FanSpeed_Get;
+  ADL_OVERDRIVE6_STATEINFO_GET  ADL_Overdrive6_StateInfo_Get;
+  ADL_OVERDRIVE6_TEMPERATURE_GET ADL_Overdrive6_Temperature_Get;
+  ADL_OVERDRIVE_CAPS ADL_Overdrive_Caps;
+  ADL2_OVERDRIVE_CAPS ADL2_Overdrive_Caps;
+  ADL2_NEW_QUERYPMLOGDATA_GET ADL2_New_QueryPMLogData_Get;
 
 } hm_adl_lib_t;
 
@@ -326,6 +691,8 @@ int hm_ADL_Overdrive_CurrentActivity_Get (void *hashcat_ctx, int iAdapterIndex,
 int hm_ADL_Overdrive5_FanSpeed_Get (void *hashcat_ctx, int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue);
 int hm_ADL_Overdrive6_FanSpeed_Get (void *hashcat_ctx, int iAdapterIndex, ADLOD6FanSpeedInfo *lpFanSpeedInfo);
 int hm_ADL_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *od_supported, int *od_enabled, int *od_version);
-int hm_ADL_Overdrive6_TargetTemperatureData_Get (void *hashcat_ctx, int iAdapterIndex, int *cur_temp, int *default_temp);
+int hm_ADL2_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *od_supported, int *od_enabled, int *od_version);
+int hm_ADL2_New_QueryPMLogData_Get (void *hashcat_ctx, int iAdapterIndex, ADLPMLogDataOutput *lpDataOutput);
+
 
 #endif // _EXT_ADL_H
diff --git a/include/types.h b/include/types.h
index 9b58e6c6a..19743ad6e 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1799,8 +1799,6 @@ typedef struct hwmon_ctx
 
   hm_attrs_t *hm_device;
 
-  ADLOD6MemClockState *od_clock_mem_status;
-
 } hwmon_ctx_t;
 
 #if defined (__APPLE__)
diff --git a/src/ext_ADL.c b/src/ext_ADL.c
index e54420713..08ba55e1e 100644
--- a/src/ext_ADL.c
+++ b/src/ext_ADL.c
@@ -50,27 +50,27 @@ int adl_init (void *hashcat_ctx)
     return -1;
   }
 
-  HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Adapter_AdapterInfo_Get, ADL_ADAPTER_ADAPTERINFO_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Display_DisplayInfo_Get, ADL_DISPLAY_DISPLAYINFO_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Adapter_ID_Get, ADL_ADAPTER_ID_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Adapter_VideoBiosInfo_Get, ADL_ADAPTER_VIDEOBIOSINFO_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive5_ThermalDevices_Enum, ADL_OVERDRIVE5_THERMALDEVICES_ENUM, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive5_Temperature_Get, ADL_OVERDRIVE5_TEMPERATURE_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive6_Temperature_Get, ADL_OVERDRIVE6_TEMPERATURE_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Overdrive5_CurrentActivity_Get, ADL_OVERDRIVE5_CURRENTACTIVITY_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Overdrive5_FanSpeedInfo_Get, ADL_OVERDRIVE5_FANSPEEDINFO_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Overdrive5_FanSpeed_Get, ADL_OVERDRIVE5_FANSPEED_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive6_FanSpeed_Get, ADL_OVERDRIVE6_FANSPEED_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive_Caps, ADL_OVERDRIVE_CAPS, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive5_ODParameters_Get, ADL_OVERDRIVE5_ODPARAMETERS_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive5_ODPerformanceLevels_Get, ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive5_Temperature_Get, ADL_OVERDRIVE5_TEMPERATURE_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive5_ThermalDevices_Enum, ADL_OVERDRIVE5_THERMALDEVICES_ENUM, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Overdrive6_Capabilities_Get, ADL_OVERDRIVE6_CAPABILITIES_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive6_StateInfo_Get, ADL_OVERDRIVE6_STATEINFO_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Overdrive6_CurrentStatus_Get, ADL_OVERDRIVE6_CURRENTSTATUS_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive6_TargetTemperatureData_Get, ADL_OVERDRIVE6_TARGETTEMPERATUREDATA_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Overdrive6_TargetTemperatureRangeInfo_Get, ADL_OVERDRIVE6_TARGETTEMPERATURERANGEINFO_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive6_FanSpeed_Get, ADL_OVERDRIVE6_FANSPEED_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive6_StateInfo_Get, ADL_OVERDRIVE6_STATEINFO_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive6_Temperature_Get, ADL_OVERDRIVE6_TEMPERATURE_GET, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL_Overdrive_Caps, ADL_OVERDRIVE_CAPS, ADL, 0);
+  HC_LOAD_FUNC(adl, ADL2_Overdrive_Caps, ADL2_OVERDRIVE_CAPS, ADL, 1);
+  HC_LOAD_FUNC(adl, ADL2_New_QueryPMLogData_Get, ADL2_NEW_QUERYPMLOGDATA_GET, ADL, 1);
 
   return 0;
 }
@@ -270,17 +270,42 @@ int hm_ADL_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *od_support
   return 0;
 }
 
-int hm_ADL_Overdrive6_TargetTemperatureData_Get (void *hashcat_ctx, int iAdapterIndex, int *cur_temp, int *default_temp)
+int hm_ADL2_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *iSupported, int *iEnabled, int *iVersion)
+{
+  hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx;
+
+  ADL_PTR *adl = (ADL_PTR *) hwmon_ctx->hm_adl;
+
+  // Not sure if that makes any sense...
+
+  if (adl->ADL2_Overdrive_Caps == NULL)
+  {
+    return hm_ADL_Overdrive_Caps (hashcat_ctx, iAdapterIndex, iSupported, iEnabled, iVersion);
+  }
+
+  const int ADL_rc = adl->ADL2_Overdrive_Caps (NULL, iAdapterIndex, iSupported, iEnabled, iVersion);
+
+  if (ADL_rc != ADL_OK)
+  {
+    event_log_error (hashcat_ctx, "ADL2_Overdrive_Caps(): %d", ADL_rc);
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hm_ADL2_New_QueryPMLogData_Get (void *hashcat_ctx, int iAdapterIndex, ADLPMLogDataOutput *lpDataOutput)
 {
   hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx;
 
   ADL_PTR *adl = (ADL_PTR *) hwmon_ctx->hm_adl;
 
-  const int ADL_rc = adl->ADL_Overdrive6_TargetTemperatureData_Get (iAdapterIndex, cur_temp, default_temp);
+  const int ADL_rc = adl->ADL2_New_QueryPMLogData_Get (NULL, iAdapterIndex, lpDataOutput);
 
   if (ADL_rc != ADL_OK)
   {
-    event_log_error (hashcat_ctx, "ADL_Overdrive6_TargetTemperatureData_Get(): %d", ADL_rc);
+    event_log_error (hashcat_ctx, "ADL2_New_QueryPMLogData_Get(): %d", ADL_rc);
 
     return -1;
   }
diff --git a/src/hwmon.c b/src/hwmon.c
index 374056d07..ee5a001e8 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -109,19 +109,7 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
           }
           else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
           {
-            int CurrentValue = 0;
-            int DefaultValue = 0;
 
-            if (hm_ADL_Overdrive6_TargetTemperatureData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &CurrentValue, &DefaultValue) == -1)
-            {
-              hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
-
-              return -1;
-            }
-
-            // the return value has never been tested since hm_ADL_Overdrive6_TargetTemperatureData_Get() never worked on any system. expect problems.
-
-            return DefaultValue;
           }
         }
       }
@@ -346,6 +334,22 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
 
             return Temperature / 1000;
           }
+
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_TEMPERATURE_EDGE].value;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -431,8 +435,37 @@ int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
 
           if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
           {
+            ADLOD6FanSpeedInfo lpFanSpeedInfo;
+
+            memset (&lpFanSpeedInfo, 0, sizeof (lpFanSpeedInfo));
+
+            if (hm_ADL_Overdrive6_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &lpFanSpeedInfo) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
+              hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
+
+              return -1;
+            }
+
             return 1;
           }
+
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
+              hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_FAN_PERCENTAGE].supported;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -542,6 +575,22 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
 
             return faninfo.iFanSpeedPercent;
           }
+
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_FAN_PERCENTAGE].value;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -617,18 +666,23 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
       {
         if (hwmon_ctx->hm_adl)
         {
-          ADLPMActivity PMActivity;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+          {
+            ADLPMActivity PMActivity;
 
-          PMActivity.iSize = sizeof (ADLPMActivity);
+            PMActivity.iSize = sizeof (ADLPMActivity);
 
-          if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-          {
-            hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
+            if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
 
-            return -1;
+              return -1;
+            }
+
+            return PMActivity.iCurrentBusLanes;
           }
 
-          return PMActivity.iCurrentBusLanes;
+          // NO OD8
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -704,18 +758,37 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
       {
         if (hwmon_ctx->hm_adl)
         {
-          ADLPMActivity PMActivity;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+          {
+            ADLPMActivity PMActivity;
 
-          PMActivity.iSize = sizeof (ADLPMActivity);
+            PMActivity.iSize = sizeof (ADLPMActivity);
 
-          if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-          {
-            hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
+            if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
 
-            return -1;
+              return -1;
+            }
+
+            return PMActivity.iActivityPercent;
           }
 
-          return PMActivity.iActivityPercent;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_INFO_ACTIVITY_GFX].value;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -808,18 +881,37 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
       {
         if (hwmon_ctx->hm_adl)
         {
-          ADLPMActivity PMActivity;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+          {
+            ADLPMActivity PMActivity;
 
-          PMActivity.iSize = sizeof (ADLPMActivity);
+            PMActivity.iSize = sizeof (ADLPMActivity);
 
-          if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-          {
-            hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
+            if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
 
-            return -1;
+              return -1;
+            }
+
+            return PMActivity.iMemoryClock / 100;
           }
 
-          return PMActivity.iMemoryClock / 100;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_CLK_MEMCLK].value;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -895,18 +987,37 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
       {
         if (hwmon_ctx->hm_adl)
         {
-          ADLPMActivity PMActivity;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+          {
+            ADLPMActivity PMActivity;
 
-          PMActivity.iSize = sizeof (ADLPMActivity);
+            PMActivity.iSize = sizeof (ADLPMActivity);
 
-          if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-          {
-            hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
+            if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
 
-            return -1;
+              return -1;
+            }
+
+            return PMActivity.iEngineClock / 100;
           }
 
-          return PMActivity.iEngineClock / 100;
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_CLK_GFXCLK].value;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)
@@ -1400,7 +1511,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
               int od_enabled   = 0;
               int od_version   = 0;
 
-              hm_ADL_Overdrive_Caps (hashcat_ctx, lpAdapterInfo[i].iAdapterIndex, &od_supported, &od_enabled, &od_version);
+              hm_ADL2_Overdrive_Caps (hashcat_ctx, lpAdapterInfo[i].iAdapterIndex, &od_supported, &od_enabled, &od_version);
+
+              if (od_version < 8) od_version = 5;
 
               hm_adapters_adl[device_id].od_version = od_version;
 
@@ -1534,12 +1647,6 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   hwmon_ctx->enabled = true;
 
-  /**
-   * save buffer required for later restores
-   */
-
-  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (ADLOD6MemClockState));
-
   /**
    * HM devices: copy
    */
@@ -1795,8 +1902,6 @@ void hwmon_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 
   // free memory
 
-  hcfree (hwmon_ctx->od_clock_mem_status);
-
   hcfree (hwmon_ctx->hm_device);
 
   memset (hwmon_ctx, 0, sizeof (hwmon_ctx_t));

From 1e3bd2c8a0c8ad213d088b98248174d322477f76 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 26 Jul 2021 07:59:12 +0200
Subject: [PATCH 11/24] AMD GPUs: Add inline assembly code for
 md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone
 8+

---
 OpenCL/m00500-optimized.cl    | 30 +++++++++++---
 OpenCL/m01500_a3-pure.cl      | 24 +++++------
 OpenCL/m01600-optimized.cl    | 30 +++++++++++---
 OpenCL/m05800-optimized.cl    | 10 ++++-
 OpenCL/m06300-optimized.cl    | 30 +++++++++++---
 OpenCL/m07400-optimized.cl    | 77 ++++++++++++++++++++++++++++-------
 OpenCL/m10700-optimized.cl    | 25 +++++++++---
 OpenCL/m11600-pure.cl         | 11 ++++-
 OpenCL/m12500-pure.cl         | 11 ++++-
 OpenCL/m13800_a0-optimized.cl | 11 ++++-
 OpenCL/m13800_a1-optimized.cl | 11 ++++-
 OpenCL/m13800_a3-optimized.cl | 11 ++++-
 OpenCL/m23700-pure.cl         | 11 ++++-
 OpenCL/m23800-pure.cl         | 11 ++++-
 docs/changes.txt              |  2 +
 src/modules/module_01500.c    |  6 ++-
 16 files changed, 244 insertions(+), 67 deletions(-)

diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl
index 19f7153ff..38a361b96 100644
--- a/OpenCL/m00500-optimized.cl
+++ b/OpenCL/m00500-optimized.cl
@@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   u32 tmp3;
   u32 tmp4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -45,12 +45,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   tmp4 = hc_bytealign (in3,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
@@ -139,7 +145,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   u32 tmp3;
   u32 tmp4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -153,12 +159,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   tmp4 = hc_bytealign (in3, in4, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
@@ -246,7 +258,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   u32 tmp1;
   u32 tmp2;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
 
@@ -255,12 +267,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   tmp2 = hc_bytealign (in1,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl
index c2c4245e1..7a5adf017 100644
--- a/OpenCL/m01500_a3-pure.cl
+++ b/OpenCL/m01500_a3-pure.cl
@@ -1664,18 +1664,18 @@ DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32
 
 DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
 {
-  const u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
-  const u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
-  const u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
-  const u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
-  const u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
-  const u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
-  const u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
-  const u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
-  const u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
-  const u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
-  const u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
-  const u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
+  const u32 s001 = (0x001 & SALT) ? 1 : 0;
+  const u32 s002 = (0x002 & SALT) ? 1 : 0;
+  const u32 s004 = (0x004 & SALT) ? 1 : 0;
+  const u32 s008 = (0x008 & SALT) ? 1 : 0;
+  const u32 s010 = (0x010 & SALT) ? 1 : 0;
+  const u32 s020 = (0x020 & SALT) ? 1 : 0;
+  const u32 s040 = (0x040 & SALT) ? 1 : 0;
+  const u32 s080 = (0x080 & SALT) ? 1 : 0;
+  const u32 s100 = (0x100 & SALT) ? 1 : 0;
+  const u32 s200 = (0x200 & SALT) ? 1 : 0;
+  const u32 s400 = (0x400 & SALT) ? 1 : 0;
+  const u32 s800 = (0x800 & SALT) ? 1 : 0;
 
   KXX_DECL u32 k00, k01, k02, k03, k04, k05;
   KXX_DECL u32 k06, k07, k08, k09, k10, k11;
diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl
index cfaad44cc..62194e973 100644
--- a/OpenCL/m01600-optimized.cl
+++ b/OpenCL/m01600-optimized.cl
@@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   u32 tmp3;
   u32 tmp4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -44,12 +44,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   tmp4 = hc_bytealign (in3,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
@@ -138,7 +144,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   u32 tmp3;
   u32 tmp4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -152,12 +158,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   tmp4 = hc_bytealign (in3, in4, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
@@ -245,7 +257,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   u32 tmp1;
   u32 tmp2;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
 
@@ -254,12 +266,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   tmp2 = hc_bytealign (in1,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl
index 38099159f..9f8f5a3cc 100644
--- a/OpenCL/m05800-optimized.cl
+++ b/OpenCL/m05800-optimized.cl
@@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
   u32 tmp4;
   u32 tmp5;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -2134,12 +2134,18 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
   tmp5 = hc_bytealign (in4,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl
index b7c9ddddd..f242259da 100644
--- a/OpenCL/m06300-optimized.cl
+++ b/OpenCL/m06300-optimized.cl
@@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   u32 tmp3;
   u32 tmp4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -41,12 +41,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   tmp4 = hc_bytealign (in3,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
@@ -135,7 +141,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   u32 tmp3;
   u32 tmp4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -149,12 +155,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   tmp4 = hc_bytealign (in3, in4, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
@@ -242,7 +254,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   u32 tmp1;
   u32 tmp2;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
 
@@ -251,12 +263,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   tmp2 = hc_bytealign (in1,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
 
+  #if defined IS_NV
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
+  #endif
 
   u32 in0 = append[0];
   u32 in1 = append[1];
diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl
index 7efa5c94e..5fb83a2ad 100644
--- a/OpenCL/m07400-optimized.cl
+++ b/OpenCL/m07400-optimized.cl
@@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
   u32 in2 = append[2];
   u32 in3 = append[3];
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -53,8 +53,15 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
   const u32 tmp4 = hc_bytealign_be (in3,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@@ -165,7 +172,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
   u32 in2 = append[2];
   u32 in3 = append[3];
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -173,8 +180,15 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
   const u32 tmp4 = hc_bytealign_be (in3,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@@ -322,7 +336,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
   u32 in3 = append[3];
   u32 in4 = append[4];
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -331,8 +345,15 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
   const u32 tmp5 = hc_bytealign_be (in4,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@@ -456,7 +477,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
   u32 in3 = append[3];
   u32 in4 = append[4];
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -465,8 +486,15 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
   const u32 tmp5 = hc_bytealign_be (in4,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@@ -756,7 +784,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
   u32 in2 = append[2];
   u32 in3 = append[3];
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@@ -764,8 +792,15 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
   const u32 tmp4 = hc_bytealign_be_S (in3,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@@ -915,7 +950,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
   u32 in3 = append[3];
   u32 in4 = 0x80000000;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@@ -923,8 +958,15 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
   const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@@ -1074,7 +1116,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
   u32 in3 = append[3];
   u32 in4 = append[4];
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@@ -1083,8 +1125,15 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
   const u32 tmp5 = hc_bytealign_be_S (in4,   0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
   const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl
index a9b50a6ac..9779c8fe6 100644
--- a/OpenCL/m10700-optimized.cl
+++ b/OpenCL/m10700-optimized.cl
@@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
 
     u32 i;
 
-    #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+    #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
     for (i = 0; i < pd; i++) sc[idx++] = pw[i];
                              sc[idx++] = pw[i]
                                        | hc_bytealign_be (bl[0],         0, pm4);
@@ -242,8 +242,15 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
                              sc[idx++] = hc_bytealign_be (    0, sc[i - 1], pm4);
     #endif
 
-    #ifdef IS_NV
-    int selector = (0x76543210 >> (pm4 * 4)) & 0xffff;
+    #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+    #if defined IS_NV
+    const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff;
+    #endif
+
+    #if (defined IS_AMD || defined IS_HIP)
+    const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8));
+    #endif
 
     for (i = 0; i < pd; i++) sc[idx++] = pw[i];
                              sc[idx++] = pw[i]
@@ -263,16 +270,22 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
   const u32 om = m % 4;
   const u32 od = m / 4;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
   pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
   pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
   pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
   #endif
 
-  #ifdef IS_NV
-  int selector = (0x76543210 >> (om * 4)) & 0xffff;
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
+  #if defined IS_NV
+  const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8));
+  #endif
   pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
   pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
   pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);
diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl
index be42e185b..d321aee3a 100644
--- a/OpenCL/m11600-pure.cl
+++ b/OpenCL/m11600-pure.cl
@@ -42,13 +42,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
+  #endif
 
   tmp0 = hc_byte_perm (append, 0, selector);
   tmp1 = hc_byte_perm (0, append, selector);
diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl
index f8ed47771..6112ec296 100644
--- a/OpenCL/m12500-pure.cl
+++ b/OpenCL/m12500-pure.cl
@@ -37,13 +37,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
+  #endif
 
   tmp0 = hc_byte_perm (append, 0, selector);
   tmp1 = hc_byte_perm (0, append, selector);
diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl
index 6758ffbd4..043ed0d13 100644
--- a/OpenCL/m13800_a0-optimized.cl
+++ b/OpenCL/m13800_a0-optimized.cl
@@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   u32x tmp15;
   u32x tmp16;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp00 = hc_bytealign_be (        0, carry[ 0], offset);
   tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
   tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@@ -71,8 +71,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   tmp16 = hc_bytealign_be (carry[15],         0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   tmp00 = hc_byte_perm (carry[ 0],         0, selector);
   tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl
index 85e711b94..4227e48d5 100644
--- a/OpenCL/m13800_a1-optimized.cl
+++ b/OpenCL/m13800_a1-optimized.cl
@@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   u32x tmp15;
   u32x tmp16;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp00 = hc_bytealign_be (        0, carry[ 0], offset);
   tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
   tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@@ -69,8 +69,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   tmp16 = hc_bytealign_be (carry[15],         0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   tmp00 = hc_byte_perm (carry[ 0],         0, selector);
   tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl
index 65b759de0..895d4378c 100644
--- a/OpenCL/m13800_a3-optimized.cl
+++ b/OpenCL/m13800_a3-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   u32x tmp15;
   u32x tmp16;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp00 = hc_bytealign_be (        0, carry[ 0], offset);
   tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
   tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@@ -68,8 +68,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   tmp16 = hc_bytealign_be (carry[15],         0, offset);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
+  #endif
 
   tmp00 = hc_byte_perm (carry[ 0],         0, selector);
   tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl
index af287574e..63e84cbf7 100644
--- a/OpenCL/m23700-pure.cl
+++ b/OpenCL/m23700-pure.cl
@@ -145,13 +145,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
+  #endif
 
   tmp0 = hc_byte_perm (append, 0, selector);
   tmp1 = hc_byte_perm (0, append, selector);
diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl
index f6d345677..530c3268d 100644
--- a/OpenCL/m23800-pure.cl
+++ b/OpenCL/m23800-pure.cl
@@ -56,13 +56,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
 
-  #ifdef IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
+
+  #if defined IS_NV
   const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
+  #endif
+
+  #if (defined IS_AMD || defined IS_HIP)
+  const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
+  #endif
 
   tmp0 = hc_byte_perm (append, 0, selector);
   tmp1 = hc_byte_perm (0, append, selector);
diff --git a/docs/changes.txt b/docs/changes.txt
index 7e0aab1a2..06b7f03ab 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -18,6 +18,7 @@
 ## Improvements
 ##
 
+- AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
 - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
 - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
 - Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
@@ -28,6 +29,7 @@
 ## Technical
 ##
 
+- ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
 - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
 - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
 - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c
index ea01dab96..dc7b7b47e 100644
--- a/src/modules/module_01500.c
+++ b/src/modules/module_01500.c
@@ -184,7 +184,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
     {
-      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+    }
+    else
+    {
+      hc_asprintf (&jit_build_options, "-D _unroll -fno-experimental-new-pass-manager");
     }
   }
   else

From f3f6cfadb79da9d6022b9c6b40bbb2ca28208f6a Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 26 Jul 2021 10:17:21 +0200
Subject: [PATCH 12/24] Password Safe v2: Backport optimizations reducing bank
 conflicts in bcrypt

---
 OpenCL/m09000-pure.cl | 141 +++++++++++++++++++++++++++++++-----------
 docs/changes.txt      |   3 +-
 2 files changed, 107 insertions(+), 37 deletions(-)

diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl
index 737adde4e..323cf8387 100644
--- a/OpenCL/m09000-pure.cl
+++ b/OpenCL/m09000-pure.cl
@@ -310,6 +310,51 @@ CONSTANT_VK u32a c_pbox[18] =
   0x9216d5d9, 0x8979fb1b
 };
 
+// Yes, works only with CUDA atm
+
+#ifdef DYNAMIC_LOCAL
+#define BCRYPT_AVOID_BANK_CONFLICTS
+#endif
+
+#ifdef BCRYPT_AVOID_BANK_CONFLICTS
+
+// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation
+
+#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE) + (lid))
+
+DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
+{
+  const u64 lid = get_local_id (0);
+
+  return S[KEY32 (lid, key)];
+}
+
+DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
+{
+  const u64 lid = get_local_id (0);
+
+  S[KEY32 (lid, key)] = val;
+}
+
+#undef KEY32
+
+#else
+
+// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
+//                 makes sense if there are not thread ID's (for instance on CPU)
+
+DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
+{
+  return S[key];
+}
+
+DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
+{
+  S[key] = val;
+}
+
+#endif
+
 #define BF_ROUND(L,R,N)                       \
 {                                             \
   u32 tmp;                                    \
@@ -319,10 +364,10 @@ CONSTANT_VK u32a c_pbox[18] =
   const u32 r2 = unpack_v8b_from_v32_S ((L)); \
   const u32 r3 = unpack_v8a_from_v32_S ((L)); \
                                               \
-  tmp  = S0[r0];                              \
-  tmp += S1[r1];                              \
-  tmp ^= S2[r2];                              \
-  tmp += S3[r3];                              \
+  tmp  = GET_KEY32 (S0, r0);                  \
+  tmp += GET_KEY32 (S1, r1);                  \
+  tmp ^= GET_KEY32 (S2, r2);                  \
+  tmp += GET_KEY32 (S3, r3);                  \
                                               \
   (R) ^= tmp ^ P[(N)];                        \
 }
@@ -357,6 +402,10 @@ CONSTANT_VK u32a c_pbox[18] =
   L ^= P[17];           \
 }
 
+#ifdef DYNAMIC_LOCAL
+extern __shared__ u32 S[];
+#endif
+
 KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
 {
   /**
@@ -471,22 +520,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS
     P[i] = c_pbox[i];
   }
 
+  #ifdef DYNAMIC_LOCAL
+  // from host
+  #else
   LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
+  #endif
 
+  #ifdef BCRYPT_AVOID_BANK_CONFLICTS
+  LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0);
+  LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1);
+  LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2);
+  LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3);
+  #else
   LOCAL_AS u32 *S0 = S0_all[lid];
   LOCAL_AS u32 *S1 = S1_all[lid];
   LOCAL_AS u32 *S2 = S2_all[lid];
   LOCAL_AS u32 *S3 = S3_all[lid];
+  #endif
 
   for (u32 i = 0; i < 256; i++)
   {
-    S0[i] = c_sbox0[i];
-    S1[i] = c_sbox1[i];
-    S2[i] = c_sbox2[i];
-    S3[i] = c_sbox3[i];
+    SET_KEY32 (S0, i, c_sbox0[i]);
+    SET_KEY32 (S1, i, c_sbox1[i]);
+    SET_KEY32 (S2, i, c_sbox2[i]);
+    SET_KEY32 (S3, i, c_sbox3[i]);
   }
 
   for (u32 i = 0; i < 18; i++)
@@ -509,59 +569,59 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS
   {
     BF_ENCRYPT (L0, R0);
 
-    S0[i + 0] = L0;
-    S0[i + 1] = R0;
+    SET_KEY32 (S0, i + 0, L0);
+    SET_KEY32 (S0, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S0[i + 2] = L0;
-    S0[i + 3] = R0;
+    SET_KEY32 (S0, i + 2, L0);
+    SET_KEY32 (S0, i + 3, R0);
   }
 
   for (u32 i = 0; i < 256; i += 4)
   {
     BF_ENCRYPT (L0, R0);
 
-    S1[i + 0] = L0;
-    S1[i + 1] = R0;
+    SET_KEY32 (S1, i + 0, L0);
+    SET_KEY32 (S1, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S1[i + 2] = L0;
-    S1[i + 3] = R0;
+    SET_KEY32 (S1, i + 2, L0);
+    SET_KEY32 (S1, i + 3, R0);
   }
 
   for (u32 i = 0; i < 256; i += 4)
   {
     BF_ENCRYPT (L0, R0);
 
-    S2[i + 0] = L0;
-    S2[i + 1] = R0;
+    SET_KEY32 (S2, i + 0, L0);
+    SET_KEY32 (S2, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S2[i + 2] = L0;
-    S2[i + 3] = R0;
+    SET_KEY32 (S2, i + 2, L0);
+    SET_KEY32 (S2, i + 3, R0);
   }
 
   for (u32 i = 0; i < 256; i += 4)
   {
     BF_ENCRYPT (L0, R0);
 
-    S3[i + 0] = L0;
-    S3[i + 1] = R0;
+    SET_KEY32 (S3, i + 0, L0);
+    SET_KEY32 (S3, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S3[i + 2] = L0;
-    S3[i + 3] = R0;
+    SET_KEY32 (S3, i + 2, L0);
+    SET_KEY32 (S3, i + 3, R0);
   }
 
-  // store
-
   tmps[gid].digest[0] = salt_buf[0];
   tmps[gid].digest[1] = salt_buf[1];
 
+  // store
+
   for (u32 i = 0; i < 18; i++)
   {
     tmps[gid].P[i] = P[i];
@@ -569,10 +629,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS
 
   for (u32 i = 0; i < 256; i++)
   {
-    tmps[gid].S0[i] = S0[i];
-    tmps[gid].S1[i] = S1[i];
-    tmps[gid].S2[i] = S2[i];
-    tmps[gid].S3[i] = S3[i];
+    tmps[gid].S0[i] = GET_KEY32 (S0, i);
+    tmps[gid].S1[i] = GET_KEY32 (S1, i);
+    tmps[gid].S2[i] = GET_KEY32 (S2, i);
+    tmps[gid].S3[i] = GET_KEY32 (S3, i);
   }
 }
 
@@ -602,22 +662,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_loop (KERN_ATTR_TMPS
     P[i] = tmps[gid].P[i];
   }
 
+  #ifdef DYNAMIC_LOCAL
+  // from host
+  #else
   LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
+  #endif
 
+  #ifdef BCRYPT_AVOID_BANK_CONFLICTS
+  LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0);
+  LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1);
+  LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2);
+  LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3);
+  #else
   LOCAL_AS u32 *S0 = S0_all[lid];
   LOCAL_AS u32 *S1 = S1_all[lid];
   LOCAL_AS u32 *S2 = S2_all[lid];
   LOCAL_AS u32 *S3 = S3_all[lid];
+  #endif
 
   for (u32 i = 0; i < 256; i++)
   {
-    S0[i] = tmps[gid].S0[i];
-    S1[i] = tmps[gid].S1[i];
-    S2[i] = tmps[gid].S2[i];
-    S3[i] = tmps[gid].S3[i];
+    SET_KEY32 (S0, i, tmps[gid].S0[i]);
+    SET_KEY32 (S1, i, tmps[gid].S1[i]);
+    SET_KEY32 (S2, i, tmps[gid].S2[i]);
+    SET_KEY32 (S3, i, tmps[gid].S3[i]);
   }
 
   // loop
@@ -630,8 +701,6 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_loop (KERN_ATTR_TMPS
     BF_ENCRYPT (L0, R0);
   }
 
-  // store
-
   tmps[gid].digest[0] = L0;
   tmps[gid].digest[1] = R0;
 }
diff --git a/docs/changes.txt b/docs/changes.txt
index 06b7f03ab..69c6c1ad0 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -22,6 +22,7 @@
 - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
 - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
 - Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
+- Password Safe v2: Backport optimizations reducing bank conflicts in bcrypt
 - Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
 - Slow Kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE
 
@@ -31,8 +32,8 @@
 
 - ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
 - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
-- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
 - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
+- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
 - SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
 
 ##

From 7f419c68aff2006376b0f2bc6225eededc62d772 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 26 Jul 2021 13:38:39 +0200
Subject: [PATCH 13/24] Blowfish Kernels: Backport optimizations reducing bank
 conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1

---
 OpenCL/m18600-pure.cl      | 118 ++++++++++++++++++++++++++++---------
 docs/changes.txt           |   2 +-
 src/modules/module_18600.c |  63 ++++++++++++++++----
 3 files changed, 141 insertions(+), 42 deletions(-)

diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl
index 061c61bd8..f98aca9c0 100644
--- a/OpenCL/m18600-pure.cl
+++ b/OpenCL/m18600-pure.cl
@@ -319,6 +319,51 @@ CONSTANT_VK u32a c_pbox[18] =
   0x9216d5d9, 0x8979fb1b
 };
 
+// Yes, works only with CUDA atm
+
+#ifdef DYNAMIC_LOCAL
+#define BCRYPT_AVOID_BANK_CONFLICTS
+#endif
+
+#ifdef BCRYPT_AVOID_BANK_CONFLICTS
+
+// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation
+
+#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE_COMP) + (lid))
+
+DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
+{
+  const u64 lid = get_local_id (0);
+
+  return S[KEY32 (lid, key)];
+}
+
+DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
+{
+  const u64 lid = get_local_id (0);
+
+  S[KEY32 (lid, key)] = val;
+}
+
+#undef KEY32
+
+#else
+
+// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
+//                 makes sense if there are not thread ID's (for instance on CPU)
+
+DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
+{
+  return S[key];
+}
+
+DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
+{
+  S[key] = val;
+}
+
+#endif
+
 #define BF_ROUND(L,R,N)                       \
 {                                             \
   u32 tmp;                                    \
@@ -328,10 +373,10 @@ CONSTANT_VK u32a c_pbox[18] =
   const u32 r2 = unpack_v8b_from_v32_S ((L)); \
   const u32 r3 = unpack_v8a_from_v32_S ((L)); \
                                               \
-  tmp  = S0[r0];                              \
-  tmp += S1[r1];                              \
-  tmp ^= S2[r2];                              \
-  tmp += S3[r3];                              \
+  tmp  = GET_KEY32 (S0, r0);                  \
+  tmp += GET_KEY32 (S1, r1);                  \
+  tmp ^= GET_KEY32 (S2, r2);                  \
+  tmp += GET_KEY32 (S3, r3);                  \
                                               \
   (R) ^= tmp ^ P[(N)];                        \
 }
@@ -366,6 +411,10 @@ CONSTANT_VK u32a c_pbox[18] =
   L ^= P[17];           \
 }
 
+#ifdef DYNAMIC_LOCAL
+extern __shared__ u32 S[];
+#endif
+
 DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest)
 {
   digest[0] = ipad[0];
@@ -586,7 +635,7 @@ KERNEL_FQ void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
   }
 }
 
-KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
+KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
 {
   const u64 gid = get_global_id (0);
   const u64 lid = get_local_id (0);
@@ -616,22 +665,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_
     P[i] = c_pbox[i] ^ ukey[i % 4];
   }
 
-  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
+  #ifdef DYNAMIC_LOCAL
+  // from host
+  #else
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE_COMP][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE_COMP][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE_COMP][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE_COMP][256];
+  #endif
 
+  #ifdef BCRYPT_AVOID_BANK_CONFLICTS
+  LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 0);
+  LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 1);
+  LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 2);
+  LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 3);
+  #else
   LOCAL_AS u32 *S0 = S0_all[lid];
   LOCAL_AS u32 *S1 = S1_all[lid];
   LOCAL_AS u32 *S2 = S2_all[lid];
   LOCAL_AS u32 *S3 = S3_all[lid];
+  #endif
 
   for (u32 i = 0; i < 256; i++)
   {
-    S0[i] = c_sbox0[i];
-    S1[i] = c_sbox1[i];
-    S2[i] = c_sbox2[i];
-    S3[i] = c_sbox3[i];
+    SET_KEY32 (S0, i, c_sbox0[i]);
+    SET_KEY32 (S1, i, c_sbox1[i]);
+    SET_KEY32 (S2, i, c_sbox2[i]);
+    SET_KEY32 (S3, i, c_sbox3[i]);
   }
 
   u32 L0 = 0;
@@ -649,52 +709,52 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_
   {
     BF_ENCRYPT (L0, R0);
 
-    S0[i + 0] = L0;
-    S0[i + 1] = R0;
+    SET_KEY32 (S0, i + 0, L0);
+    SET_KEY32 (S0, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S0[i + 2] = L0;
-    S0[i + 3] = R0;
+    SET_KEY32 (S0, i + 2, L0);
+    SET_KEY32 (S0, i + 3, R0);
   }
 
   for (u32 i = 0; i < 256; i += 4)
   {
     BF_ENCRYPT (L0, R0);
 
-    S1[i + 0] = L0;
-    S1[i + 1] = R0;
+    SET_KEY32 (S1, i + 0, L0);
+    SET_KEY32 (S1, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S1[i + 2] = L0;
-    S1[i + 3] = R0;
+    SET_KEY32 (S1, i + 2, L0);
+    SET_KEY32 (S1, i + 3, R0);
   }
 
   for (u32 i = 0; i < 256; i += 4)
   {
     BF_ENCRYPT (L0, R0);
 
-    S2[i + 0] = L0;
-    S2[i + 1] = R0;
+    SET_KEY32 (S2, i + 0, L0);
+    SET_KEY32 (S2, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S2[i + 2] = L0;
-    S2[i + 3] = R0;
+    SET_KEY32 (S2, i + 2, L0);
+    SET_KEY32 (S2, i + 3, R0);
   }
 
   for (u32 i = 0; i < 256; i += 4)
   {
     BF_ENCRYPT (L0, R0);
 
-    S3[i + 0] = L0;
-    S3[i + 1] = R0;
+    SET_KEY32 (S3, i + 0, L0);
+    SET_KEY32 (S3, i + 1, R0);
 
     BF_ENCRYPT (L0, R0);
 
-    S3[i + 2] = L0;
-    S3[i + 3] = R0;
+    SET_KEY32 (S3, i + 2, L0);
+    SET_KEY32 (S3, i + 3, R0);
   }
 
   GLOBAL_AS const odf11_t *es = &esalt_bufs[DIGESTS_OFFSET];
diff --git a/docs/changes.txt b/docs/changes.txt
index 69c6c1ad0..cc14c475e 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -20,9 +20,9 @@
 
 - AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
 - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
+- Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1
 - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
 - Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
-- Password Safe v2: Backport optimizations reducing bank conflicts in bcrypt
 - Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
 - Slow Kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE
 
diff --git a/src/modules/module_18600.c b/src/modules/module_18600.c
index c24ea37fe..8f2d05f4c 100644
--- a/src/modules/module_18600.c
+++ b/src/modules/module_18600.c
@@ -21,7 +21,8 @@ static const char *HASH_NAME      = "Open Document Format (ODF) 1.1 (SHA-1, Blow
 static const u64   KERN_TYPE      = 18600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_DYNAMIC_SHARED;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$odf$*0*0*1024*16*bff753835f4ea15644b8a2f8e4b5be3d147b9576*8*ee371da34333b69d*16*a902eff54a4d782a26a899a31f97bef4*0*dae7e41fbc3a500d3ce152edd8876c4f38fb17d673ee2ac44ef1e0e283622cd2ae298a82d8d98f2ea737247881fc353e73a2f535c6e13e0cdc60821c1a61c53a4b0c46ff3a3b355d7b793fad50de15999fc7c1194321d1c54316c3806956c4a3ade7daabb912a2a36398eba883af088b3cb69b43365d9ba9fce3fb0c1524f73947a7e9fc1bf3adb5f85a367035feacb5d97c578b037144c2793f34aa09dcd04bdaa455aee0d4c52fe377248611dd56f2bd4eb294673525db905f5d905a28dec0909348e6bf94bcebf03ddd61a48797cd5728ce6dbb71037b268f526e806401abcf495f6edd0b5d87118671ec690d4627f86a43e51c7f6d42a75a56eec51204d47e115e813ed4425c97b16b195e02ce776c185194b9de43ae89f356e29face016cb393d6fb93af8ea305d921d5592dd184051ac790b9b90266f52b8d53ce1cb1d762942d6d5bbd0e3821be21af9fa6874ba0c60e64f41d3e5b6caca1c53b575afdc5d8f6a3edbf874dbe009c6cb296466fe9637aed4aed8a43a95ea7d26b4090ad33d4ee7a83844b0893e8bc0f04944205fb9576cb5720f019028cd75ca9ac47b3e5fa231354d74135564df43b659cfaea7e195c4a896e0e0e0c85dc9ce3a9ce9ba552bc2a6dbac4901c19558818e1957ed72d78662bb5ba53475ca584371f1825ae0c92322a4404e63c2baad92665aac29b5c6f96e1e6338d48fb0aef4d0b686063974f58b839484f8dcf0a02537cba67a7d2c4de13125d74820cb07ec72782035af1ea6c4db61c77016d1c021b63c8b07adb4e8510f5c41bbc501f60f3dd16462399b52eb146787e38e700147c7aa23ac4d5d22d9d1c93e67a01c92a197d4765cbf8d56a862a1205abb450a182913a69b8d5334a59924f86fb3ccd0dcfe7426053e26ba26b57c05f38d85863fff1f81135b0366e8cd8680663ae8aaf7d005317b849d5e08be882708fa0d8d02d47e89150124b507c34845c922b95e62aa0b3fef218773d7aeb572c67b35ad8787f31ecc6e1846b673b8ba6172223176eabf0020b6aa3aa71405b40b2fc2127bf9741a103f1d8eca21bf27328cdf15153f2f223eff7b831a72ed8ecacf4ea8df4ea44f3a3921e5a88fb2cfa355ece0f05cbc88fdd1ecd368d6e3b2dfabd999e5b708f1bccaeebb296c9d7b76659967742fe966aa6871cbbffe710b0cd838c6e02e6eb608cb5c81d066b60b5b3604396331d97d4a2c4c2317406e48c9f5387a2c72511d1e6899bd450e9ca88d535755bcfddb53a6df118cd9cdc7d8b4b814f7bc17684d8e5975defaa25d06f410ed0724c16b8f69ec3869bc1f05c71483666968d1c04509875dadd72c6182733d564eb1a7d555dc34f6b817c5418626214d0b2c3901c5a46f5b20fddfdf9f71a7dfd75b9928778a3f65e1832dff22be973c2b259744d500a3027c2a2e08972eaaad4c5c4ec871";
@@ -66,16 +67,25 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
+  // this mode heavily depends on the available shared memory size
+  // note the kernel need to have some special code changes in order to make use to use post-48k memory region
+  // we need to set some macros
+
+  bool use_dynamic = false;
+
+  if (device_param->is_cuda == true)
+  {
+    use_dynamic = true;
+  }
+
   // this uses some nice feedback effect.
   // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
   // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
   // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
 
-  u32 fixed_local_size = 0;
-
   if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
-    fixed_local_size = 1;
+    hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", 1);
   }
   else
   {
@@ -91,29 +101,58 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
       if (device_param->is_opencl == true)
       {
-        overhead = 4;
+        overhead = 1;
       }
     }
 
     if (user_options->kernel_threads_chgd == true)
     {
-      fixed_local_size = user_options->kernel_threads;
+      u32 fixed_local_size = user_options->kernel_threads;
+
+      if (use_dynamic == true)
+      {
+        if ((fixed_local_size * 4096) > device_param->kernel_dynamic_local_mem_size_memset)
+        {
+          // otherwise out-of-bound reads
 
-      // otherwise out-of-bound reads
+          fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096;
+        }
 
-      if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead))
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u -D DYNAMIC_LOCAL", fixed_local_size);
+      }
+      else
       {
-        fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+        if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead))
+        {
+          // otherwise out-of-bound reads
+
+          fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+        }
+
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", fixed_local_size);
       }
     }
     else
     {
-      fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+      if (use_dynamic == true)
+      {
+        // using kernel_dynamic_local_mem_size_memset is a bit hackish.
+        // we had to brute-force this value out of an already loaded CUDA function.
+        // there's no official way to query for this value.
+
+        const u32 fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096;
+
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u -D DYNAMIC_LOCAL", fixed_local_size);
+      }
+      else
+      {
+        const u32 fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", fixed_local_size);
+      }
     }
   }
 
-  hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
-
   return jit_build_options;
 }
 

From 45fce5d3a3f12fd851bece1bf4100488ab0bb43d Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Mon, 26 Jul 2021 19:04:30 +0200
Subject: [PATCH 14/24] fix snmpv3 md5/sha1, tested with real hashes

---
 OpenCL/m25100-pure.cl        |  4 ++--
 OpenCL/m25200-pure.cl        | 16 ++++++++--------
 src/modules/module_25100.c   | 20 ++++++++++----------
 src/modules/module_25200.c   | 27 ++++++++++++++-------------
 tools/test_modules/m25100.pm |  8 ++++----
 tools/test_modules/m25200.pm |  6 +++---
 6 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/OpenCL/m25100-pure.cl b/OpenCL/m25100-pure.cl
index 72cb0ba47..1fc28c664 100644
--- a/OpenCL/m25100-pure.cl
+++ b/OpenCL/m25100-pure.cl
@@ -18,8 +18,8 @@
 #define COMPARE_M "inc_comp_multi.cl"
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  12
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
diff --git a/OpenCL/m25200-pure.cl b/OpenCL/m25200-pure.cl
index f72fce044..e36caaf6e 100644
--- a/OpenCL/m25200-pure.cl
+++ b/OpenCL/m25200-pure.cl
@@ -18,13 +18,17 @@
 #define COMPARE_M "inc_comp_multi.cl"
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  12
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
-#define SNMPV3_TMP_ELEMS  4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
-#define SNMPV3_HASH_ELEMS 8    // 8 = aligned 5
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8    // 8 = aligned 5
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
 
 typedef struct hmac_sha1_tmp
 {
@@ -33,10 +37,6 @@ typedef struct hmac_sha1_tmp
 
 } hmac_sha1_tmp_t;
 
-#define SNMPV3_MAX_SALT_ELEMS    512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
-#define SNMPV3_MAX_ENGINE_ELEMS  16  // 16 * 4 = 64 > 32, also has to be multiple of 64
-#define SNMPV3_MAX_PNUM_ELEMS    4   // 4 * 4 = 16 > 9
-
 typedef struct snmpv3
 {
   u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
diff --git a/src/modules/module_25100.c b/src/modules/module_25100.c
index 22155ce9a..c0c9d68f9 100644
--- a/src/modules/module_25100.c
+++ b/src/modules/module_25100.c
@@ -24,8 +24,8 @@ static const u64   KERN_TYPE      = 25100;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
-static const char *ST_PASS        = "hashcat";
-static const char *ST_HASH        = "$SNMPv3$1$76$3081b10201033011020430f6f3d5020300ffe304010702010304373035040d80001f888059dc486145a2632202010802020ab90405706970706f040c00000000000000000000000004080000000103d5321a0460826ecf6443956d4c364bfc6f6ffc8ee0df000ffd0955af12d2c0f3c60fadea417d2bb80c0b2c1fa7a46ce44f9f16e15ee830a49881f60ecfa757d2f04000eb39a94058121d88ca20eeef4e6bf06784c67c15f144915d9bc2c6a0461da92a4abe$80001f888059dc486145a26322$c51ba677ad96869c1cb32196";
+static const char *ST_PASS        = "hashcat1";
+static const char *ST_HASH        = "$SNMPv3$1$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90";
 
 u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
 u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
@@ -45,8 +45,8 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 static const char *SIGNATURE_SNMPV3 = "$SNMPv3$1$";
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  12
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
@@ -130,23 +130,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_DIGIT;
   // salt
-  token.len_min[2] = 12 * 2;
+  token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.len_max[2] = SNMPV3_SALT_MAX * 2;
   token.sep[2]     = '$';
   token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   // engineid
-  token.len_min[3] = 5;
+  token.len_min[3] = 26;
   token.len_max[3] = SNMPV3_ENGINEID_MAX;
   token.sep[3]     = '$';
-  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
 
   // digest
-  token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
-  token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.len[4]     = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.sep[4]     = '$';
-  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+  token.attr[4]    = TOKEN_ATTR_FIXED_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
diff --git a/src/modules/module_25200.c b/src/modules/module_25200.c
index c98a347ef..398098487 100644
--- a/src/modules/module_25200.c
+++ b/src/modules/module_25200.c
@@ -24,8 +24,8 @@ static const u64   KERN_TYPE      = 25200;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
-static const char *ST_PASS        = "hashcat";
-static const char *ST_HASH        = "$SNMPv3$2$66763052$13981919518623358902340156831753173612320956749283824166083320737667668557830898783481876963136410266762758410322896320705075044221495960812100760230106803899899467077793703068392752686845035561487927252457444567685389901239388468830507087105054207914325254376053788152029716918450770264047103676562621965276752797029332926039166807829108367446173251908238116020942421323633620301312478670302264165059728208402342845743839533979473825394866704960428648622730299023225638967097578710279784722583947877561544154219162080289188160001741612377820114739093961409809862173307722539556954826052612794054060797358016549602977742745078911393042420821004243620362464971828700104979572910001640083882586179153483503492341163054930853321963503411228241996417991605003371264529827508426941919673592574025732354318435733211018917539824570724324796232199960952117561108106623865308577977944499366806697863259301760429786001824121720055893438673268643594146796410437039466462606490272723136671298529920486664067752007564122205089571790718437001200506203464426405927405102300269665189637001279369690218157456566218400534722049383049029139069701182053729830585217732347396312967325628046845068493719801191260136945971516486442056102815519090214442808707545803919529217103430588641187558031052830941742920355893755319896626873275796534820394248837050567688575113833311009595128372820474678989203565094681918285106102363272728922586582037066265522397748326630668375500179630717875844561081542915676557961288028298248995547031274515608973804660067065502484039882958958452781062725550260382637592283691962996228392332833626159043179186189904614052189303508782635840692436969244901198720814518$79f7b1$57e964c7cb117647004cf132";
+static const char *ST_PASS        = "hashcat1";
+static const char *ST_HASH        = "$SNMPv3$2$45889431$30818f02010330110204371780f3020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f534841040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a2260204073557d50201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$81f14f1930589f26f6755f6b";
 
 u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
 u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
@@ -45,8 +45,8 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 static const char *SIGNATURE_SNMPV3 = "$SNMPv3$2$";
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  12
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
@@ -130,23 +130,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_DIGIT;
   // salt
-  token.len_min[2] = 12 * 2;
+  token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.len_max[2] = SNMPV3_SALT_MAX * 2;
   token.sep[2]     = '$';
   token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   // engineid
-  token.len_min[3] = 5;
+  token.len_min[3] = 26;
   token.len_max[3] = SNMPV3_ENGINEID_MAX;
   token.sep[3]     = '$';
-  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
 
   // digest
-  token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
-  token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.len[4]     = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.sep[4]     = '$';
-  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+  token.attr[4]    = TOKEN_ATTR_FIXED_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
@@ -183,10 +183,10 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   // store sha1(snmpv3->salt_buf) in salt_buf
 
-  memcpy (salt->salt_buf, sha1_ctx.h, 20);
-
   salt->salt_len = 20;
 
+  memcpy (salt->salt_buf, sha1_ctx.h, salt->salt_len);
+
   // engineid
 
   const u8 *engineID_pos = token.buf[3];
@@ -203,12 +203,13 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   digest[0] = hex_to_u32 (hash_pos +  0);
   digest[1] = hex_to_u32 (hash_pos +  8);
   digest[2] = hex_to_u32 (hash_pos + 16);
-  digest[3] = 0;
 
   digest[0] = byte_swap_32 (digest[0]);
   digest[1] = byte_swap_32 (digest[1]);
   digest[2] = byte_swap_32 (digest[2]);
 
+  digest[3] = 0;
+
   return (PARSER_OK);
 }
 
diff --git a/tools/test_modules/m25100.pm b/tools/test_modules/m25100.pm
index 6249df9a8..2335f7f2f 100644
--- a/tools/test_modules/m25100.pm
+++ b/tools/test_modules/m25100.pm
@@ -11,14 +11,14 @@ use warnings;
 use Digest::MD5 qw (md5 md5_hex);
 use Digest::HMAC qw (hmac hmac_hex);
 
-sub module_constraints { [[1, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+sub module_constraints { [[8, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
 
 sub module_generate_hash
 {
   my $word = shift;
   my $salt = shift;
-  my $pkt_num = shift // int(rand(99999999));
-  my $engineID = shift // random_hex_string(6);
+  my $pkt_num = shift // int(rand(100000000));
+  my $engineID = shift // random_hex_string(26, 34);
 
   # make even if needed
 
@@ -71,7 +71,7 @@ sub module_verify_hash
 
   my $word_packed = pack_if_HEX_notation ($word);
 
-  my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID); #, $digest);
+  my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID);
 
   return ($new_hash, $word);
 }
diff --git a/tools/test_modules/m25200.pm b/tools/test_modules/m25200.pm
index c44212825..d27908255 100644
--- a/tools/test_modules/m25200.pm
+++ b/tools/test_modules/m25200.pm
@@ -11,14 +11,14 @@ use warnings;
 use Digest::SHA qw (sha1 sha1_hex);
 use Digest::HMAC qw (hmac hmac_hex);
 
-sub module_constraints { [[1, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+sub module_constraints { [[8, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
 
 sub module_generate_hash
 {
   my $word = shift;
   my $salt = shift;
-  my $pkt_num = shift // int(rand(99999999));
-  my $engineID = shift // random_hex_string(6);
+  my $pkt_num = shift // int(rand(100000000));
+  my $engineID = shift // random_hex_string(26, 34);
 
   # make even if needed
 

From e15fe3461e64a3668f39c04bf6d9d17d69840715 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Mon, 26 Jul 2021 19:38:53 +0200
Subject: [PATCH 15/24] add missing pw_min() to snmpv3 md5/sha1 modules

---
 src/modules/module_25100.c | 9 ++++++++-
 src/modules/module_25200.c | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/modules/module_25100.c b/src/modules/module_25100.c
index c0c9d68f9..4fdbdf828 100644
--- a/src/modules/module_25100.c
+++ b/src/modules/module_25100.c
@@ -76,6 +76,13 @@ typedef struct snmpv3
 
 } snmpv3_t;
 
+u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 pw_min = 8;
+
+  return pw_min;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (snmpv3_t);
@@ -303,7 +310,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
   module_ctx->module_pwdump_column            = MODULE_DEFAULT;
   module_ctx->module_pw_max                   = MODULE_DEFAULT;
-  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = module_pw_min;
   module_ctx->module_salt_max                 = MODULE_DEFAULT;
   module_ctx->module_salt_min                 = MODULE_DEFAULT;
   module_ctx->module_salt_type                = module_salt_type;
diff --git a/src/modules/module_25200.c b/src/modules/module_25200.c
index 398098487..66573cb5a 100644
--- a/src/modules/module_25200.c
+++ b/src/modules/module_25200.c
@@ -76,6 +76,13 @@ typedef struct snmpv3
 
 } snmpv3_t;
 
+u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 pw_min = 8;
+
+  return pw_min;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (snmpv3_t);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
   module_ctx->module_pwdump_column            = MODULE_DEFAULT;
   module_ctx->module_pw_max                   = MODULE_DEFAULT;
-  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = module_pw_min;
   module_ctx->module_salt_max                 = MODULE_DEFAULT;
   module_ctx->module_salt_min                 = MODULE_DEFAULT;
   module_ctx->module_salt_type                = module_salt_type;

From c20ff01c390b4d18a25ed50ad48eea99c4c3446e Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Mon, 26 Jul 2021 22:25:15 +0200
Subject: [PATCH 16/24] using shared buffer between md5 and sha1
 SNMPV3_TMP_ELEMS_OPT, fix to crack real hashes

---
 OpenCL/m25000-pure.cl        | 97 ++++++++++++++++++++----------------
 src/modules/module_25000.c   | 33 +++++++-----
 tools/test_modules/m25000.pm |  6 +--
 3 files changed, 79 insertions(+), 57 deletions(-)

diff --git a/OpenCL/m25000-pure.cl b/OpenCL/m25000-pure.cl
index 33c1dcc8f..249aa95fd 100644
--- a/OpenCL/m25000-pure.cl
+++ b/OpenCL/m25000-pure.cl
@@ -19,8 +19,8 @@
 #define COMPARE_M "inc_comp_multi.cl"
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  12
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
@@ -192,15 +192,15 @@ KERNEL_FQ void m25000_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
   #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
                                     // for pw length > 64 we use global memory reads
 
-  u32 tmp_md5[SNMPV3_TMP_ELEMS_OPT];
-  u32 tmp_sha1[SNMPV3_TMP_ELEMS_OPT];
-
   if (pw_len < 64)
   {
+    u32 tmp_shared[SNMPV3_TMP_ELEMS_OPT];
+
+    // md5
+
     for (int i = 0; i < pw_len64 / 4; i++)
     {
-      tmp_md5[i] = tmps[gid].tmp_md5[i];
-      tmp_sha1[i] = tmps[gid].tmp_sha1[i];
+      tmp_shared[i] = tmps[gid].tmp_md5[i];
     }
 
     for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
@@ -212,45 +212,58 @@ KERNEL_FQ void m25000_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
       u32 w2[4];
       u32 w3[4];
 
-      // md5
-
-      w0[0] = tmp_md5[idx +  0];
-      w0[1] = tmp_md5[idx +  1];
-      w0[2] = tmp_md5[idx +  2];
-      w0[3] = tmp_md5[idx +  3];
-      w1[0] = tmp_md5[idx +  4];
-      w1[1] = tmp_md5[idx +  5];
-      w1[2] = tmp_md5[idx +  6];
-      w1[3] = tmp_md5[idx +  7];
-      w2[0] = tmp_md5[idx +  8];
-      w2[1] = tmp_md5[idx +  9];
-      w2[2] = tmp_md5[idx + 10];
-      w2[3] = tmp_md5[idx + 11];
-      w3[0] = tmp_md5[idx + 12];
-      w3[1] = tmp_md5[idx + 13];
-      w3[2] = tmp_md5[idx + 14];
-      w3[3] = tmp_md5[idx + 15];
+      w0[0] = tmp_shared[idx +  0];
+      w0[1] = tmp_shared[idx +  1];
+      w0[2] = tmp_shared[idx +  2];
+      w0[3] = tmp_shared[idx +  3];
+      w1[0] = tmp_shared[idx +  4];
+      w1[1] = tmp_shared[idx +  5];
+      w1[2] = tmp_shared[idx +  6];
+      w1[3] = tmp_shared[idx +  7];
+      w2[0] = tmp_shared[idx +  8];
+      w2[1] = tmp_shared[idx +  9];
+      w2[2] = tmp_shared[idx + 10];
+      w2[3] = tmp_shared[idx + 11];
+      w3[0] = tmp_shared[idx + 12];
+      w3[1] = tmp_shared[idx + 13];
+      w3[2] = tmp_shared[idx + 14];
+      w3[3] = tmp_shared[idx + 15];
 
       md5_transform (w0, w1, w2, w3, h_md5);
+    }
 
-      // sha1
+    // sha1
+
+    for (int i = 0; i < pw_len64 / 4; i++)
+    {
+      tmp_shared[i] = tmps[gid].tmp_sha1[i];
+    }
+
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
 
-      w0[0] = tmp_sha1[idx +  0];
-      w0[1] = tmp_sha1[idx +  1];
-      w0[2] = tmp_sha1[idx +  2];
-      w0[3] = tmp_sha1[idx +  3];
-      w1[0] = tmp_sha1[idx +  4];
-      w1[1] = tmp_sha1[idx +  5];
-      w1[2] = tmp_sha1[idx +  6];
-      w1[3] = tmp_sha1[idx +  7];
-      w2[0] = tmp_sha1[idx +  8];
-      w2[1] = tmp_sha1[idx +  9];
-      w2[2] = tmp_sha1[idx + 10];
-      w2[3] = tmp_sha1[idx + 11];
-      w3[0] = tmp_sha1[idx + 12];
-      w3[1] = tmp_sha1[idx + 13];
-      w3[2] = tmp_sha1[idx + 14];
-      w3[3] = tmp_sha1[idx + 15];
+      w0[0] = tmp_shared[idx +  0];
+      w0[1] = tmp_shared[idx +  1];
+      w0[2] = tmp_shared[idx +  2];
+      w0[3] = tmp_shared[idx +  3];
+      w1[0] = tmp_shared[idx +  4];
+      w1[1] = tmp_shared[idx +  5];
+      w1[2] = tmp_shared[idx +  6];
+      w1[3] = tmp_shared[idx +  7];
+      w2[0] = tmp_shared[idx +  8];
+      w2[1] = tmp_shared[idx +  9];
+      w2[2] = tmp_shared[idx + 10];
+      w2[3] = tmp_shared[idx + 11];
+      w3[0] = tmp_shared[idx + 12];
+      w3[1] = tmp_shared[idx + 13];
+      w3[2] = tmp_shared[idx + 14];
+      w3[3] = tmp_shared[idx + 15];
 
       sha1_transform (w0, w1, w2, w3, h_sha1);
     }
diff --git a/src/modules/module_25000.c b/src/modules/module_25000.c
index 5baa18663..deac74055 100644
--- a/src/modules/module_25000.c
+++ b/src/modules/module_25000.c
@@ -24,8 +24,8 @@ static const u64   KERN_TYPE      = 25000;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
-static const char *ST_PASS        = "hashcat";
-static const char *ST_HASH        = "$SNMPv3$0$66763052$13981919518623358902340156831753173612320956749283824166083320737667668557830898783481876963136410266762758410322896320705075044221495960812100760230106803899899467077793703068392752686845035561487927252457444567685389901239388468830507087105054207914325254376053788152029716918450770264047103676562621965276752797029332926039166807829108367446173251908238116020942421323633620301312478670302264165059728208402342845743839533979473825394866704960428648622730299023225638967097578710279784722583947877561544154219162080289188160001741612377820114739093961409809862173307722539556954826052612794054060797358016549602977742745078911393042420821004243620362464971828700104979572910001640083882586179153483503492341163054930853321963503411228241996417991605003371264529827508426941919673592574025732354318435733211018917539824570724324796232199960952117561108106623865308577977944499366806697863259301760429786001824121720055893438673268643594146796410437039466462606490272723136671298529920486664067752007564122205089571790718437001200506203464426405927405102300269665189637001279369690218157456566218400534722049383049029139069701182053729830585217732347396312967325628046845068493719801191260136945971516486442056102815519090214442808707545803919529217103430588641187558031052830941742920355893755319896626873275796534820394248837050567688575113833311009595128372820474678989203565094681918285106102363272728922586582037066265522397748326630668375500179630717875844561081542915676557961288028298248995547031274515608973804660067065502484039882958958452781062725550260382637592283691962996228392332833626159043179186189904614052189303508782635840692436969244901198720814518$79f7b1$57e964c7cb117647004cf132";
+static const char *ST_PASS        = "hashcat1";
+static const char *ST_HASH        = "$SNMPv3$0$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90";
 
 u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
 u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
@@ -45,8 +45,8 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 static const char *SIGNATURE_SNMPV3 = "$SNMPv3$0$";
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  12
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  12
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
@@ -62,6 +62,7 @@ typedef struct hmac_md5_tmp
 {
   u32 tmp_md5[SNMPV3_TMP_ELEMS];
   u32 tmp_sha1[SNMPV3_TMP_ELEMS];
+
   u32 h_md5[SNMPV3_HASH_ELEMS_MD5];
   u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1];
 
@@ -79,6 +80,13 @@ typedef struct snmpv3
 
 } snmpv3_t;
 
+u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 pw_min = 8;
+
+  return pw_min;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (snmpv3_t);
@@ -133,23 +141,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_DIGIT;
   // salt
-  token.len_min[2] = 12 * 2;
+  token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.len_max[2] = SNMPV3_SALT_MAX * 2;
   token.sep[2]     = '$';
   token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   // engineid
-  token.len_min[3] = 5;
+  token.len_min[3] = 26;
   token.len_max[3] = SNMPV3_ENGINEID_MAX;
   token.sep[3]     = '$';
-  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
 
   // digest
-  token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
-  token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.len[4]     = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.sep[4]     = '$';
-  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+  token.attr[4]    = TOKEN_ATTR_FIXED_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
@@ -206,7 +214,6 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   digest[0] = hex_to_u32 (hash_pos +  0);
   digest[1] = hex_to_u32 (hash_pos +  8);
   digest[2] = hex_to_u32 (hash_pos + 16);
-  digest[3] = 0;
 
   // prefer sha1 due to speed
 
@@ -214,6 +221,8 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   digest[1] = byte_swap_32 (digest[1]);
   digest[2] = byte_swap_32 (digest[2]);
 
+  digest[3] = 0;
+
   return (PARSER_OK);
 }
 
@@ -320,7 +329,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
   module_ctx->module_pwdump_column            = MODULE_DEFAULT;
   module_ctx->module_pw_max                   = MODULE_DEFAULT;
-  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = module_pw_min;
   module_ctx->module_salt_max                 = MODULE_DEFAULT;
   module_ctx->module_salt_min                 = MODULE_DEFAULT;
   module_ctx->module_salt_type                = module_salt_type;
diff --git a/tools/test_modules/m25000.pm b/tools/test_modules/m25000.pm
index 889de9611..71cc1c512 100644
--- a/tools/test_modules/m25000.pm
+++ b/tools/test_modules/m25000.pm
@@ -12,14 +12,14 @@ use Digest::MD5 qw (md5 md5_hex);
 use Digest::SHA qw (sha1 sha1_hex);
 use Digest::HMAC qw (hmac hmac_hex);
 
-sub module_constraints { [[1, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+sub module_constraints { [[8, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] }
 
 sub module_generate_hash
 {
   my $word = shift;
   my $salt = shift;
-  my $pkt_num = shift // int(rand(99999999));
-  my $engineID = shift // random_hex_string(6);
+  my $pkt_num = shift // int(rand(100000000));
+  my $engineID = shift // random_hex_string(26, 34);
   my $mode = shift // int(rand(1)) + 1;
 
   # make even if needed

From d62fa617fb2e2517dcbbceedfa3949de86d053b8 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Mon, 26 Jul 2021 22:45:02 +0200
Subject: [PATCH 17/24] fix to crack real hashes

---
 OpenCL/m26700-pure.cl        |  6 +++---
 src/modules/module_26700.c   | 29 ++++++++++++++++++-----------
 tools/test_modules/m26700.pm |  6 +++---
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/OpenCL/m26700-pure.cl b/OpenCL/m26700-pure.cl
index 6c8f029b3..8a5a04325 100644
--- a/OpenCL/m26700-pure.cl
+++ b/OpenCL/m26700-pure.cl
@@ -18,7 +18,7 @@
 #define COMPARE_M "inc_comp_multi.cl"
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
+#define SNMPV3_ENGINEID_MAX         34
 #define SNMPV3_MSG_AUTH_PARAMS_MAX  16
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
@@ -158,10 +158,10 @@ KERNEL_FQ void m26700_loop (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
   #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
                                     // for pw length > 64 we use global memory reads
 
-  u32 tmp[SNMPV3_TMP_ELEMS_OPT];
-
   if (pw_len < 64)
   {
+    u32 tmp[SNMPV3_TMP_ELEMS_OPT];
+
     for (int i = 0; i < pw_len64 / 4; i++)
     {
       tmp[i] = tmps[gid].tmp[i];
diff --git a/src/modules/module_26700.c b/src/modules/module_26700.c
index 15da3a60d..c64f269cf 100644
--- a/src/modules/module_26700.c
+++ b/src/modules/module_26700.c
@@ -24,8 +24,8 @@ static const u64   KERN_TYPE      = 26700;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
-static const char *ST_PASS        = "hashcat";
-static const char *ST_HASH        = "$SNMPv3$3$93139992$221741464175523704413635982825760096118979556553098267101930601704853783146704303603164898517490303649758413279881023268227639264274559738208032094697403441579675568418814746064423158072029964334558571907882883041105245436623239742039483870313304031171307046174561938247029298397351679655253476035738973220651902635644891207741346383906360172060617958001549207150418505701978225626879116088671275359841611906258964723020692629233701447389366763685772212471681367034365005843875040967496437639996409692554570118676609568987002911124689769902674963799843406930141309408517459025165858554235820857416473466773963181853809212740450911140184957236422993171860303971025966646341351680880393147830452957802708608458538439866404321876100995381875117293904251031322241811475664324823327065168205689694742596451920374170034310748505203093091474865128628752667403895211365282260392475024320221767588855410235114859725219681974195474606697679001625416351117081484601569226697700302476076379$1759ce$cb8436f8e5b49d52a60d0ee076a79a97";
+static const char *ST_PASS        = "hashcat1";
+static const char *ST_HASH        = "$SNMPv3$3$45889431$308197020103301102047aa1a79e020300ffe30401010201030440303e041180001f88808106d566db57fd600000000002011002020118040e6d61747269785f5348412d3232340410000000000000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a2260204272f76620201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$2f7a3891dd2e27d3f567e4d6d0257962";
 
 u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
 u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
@@ -45,8 +45,8 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 static const char *SIGNATURE_SNMPV3 = "$SNMPv3$3$";
 
 #define SNMPV3_SALT_MAX             1500
-#define SNMPV3_ENGINEID_MAX         32
-#define SNMPV3_MSG_AUTH_PARAMS_MAX  16
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  16
 #define SNMPV3_ROUNDS               1048576
 #define SNMPV3_MAX_PW_LENGTH        64
 
@@ -76,6 +76,13 @@ typedef struct snmpv3
 
 } snmpv3_t;
 
+u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 pw_min = 8;
+
+  return pw_min;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (snmpv3_t);
@@ -130,23 +137,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_DIGIT;
   // salt
-  token.len_min[2] = 16 * 2;
+  token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.len_max[2] = SNMPV3_SALT_MAX * 2;
   token.sep[2]     = '$';
   token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   // engineid
-  token.len_min[3] = 5;
+  token.len_min[3] = 26;
   token.len_max[3] = SNMPV3_ENGINEID_MAX;
   token.sep[3]     = '$';
-  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
 
   // digest
-  token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
-  token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2;
+  token.len[4]     = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
   token.sep[4]     = '$';
-  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+  token.attr[4]    = TOKEN_ATTR_FIXED_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
@@ -316,7 +323,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
   module_ctx->module_pwdump_column            = MODULE_DEFAULT;
   module_ctx->module_pw_max                   = MODULE_DEFAULT;
-  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = module_pw_min;
   module_ctx->module_salt_max                 = MODULE_DEFAULT;
   module_ctx->module_salt_min                 = MODULE_DEFAULT;
   module_ctx->module_salt_type                = module_salt_type;
diff --git a/tools/test_modules/m26700.pm b/tools/test_modules/m26700.pm
index 4d77179f1..a97bc246e 100644
--- a/tools/test_modules/m26700.pm
+++ b/tools/test_modules/m26700.pm
@@ -11,14 +11,14 @@ use warnings;
 use Digest::SHA qw (sha224 sha224_hex);
 use Digest::HMAC qw (hmac hmac_hex);
 
-sub module_constraints { [[1, 256], [32, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+sub module_constraints { [[8, 256], [32, 3000], [-1, -1], [-1, -1], [-1, -1]] }
 
 sub module_generate_hash
 {
   my $word = shift;
   my $salt = shift;
-  my $pkt_num = shift // int(rand(99999999));
-  my $engineID = shift // random_hex_string(6);
+  my $pkt_num = shift // int(rand(100000000));
+  my $engineID = shift // random_hex_string(26, 34);
 
   # make even if needed
 

From fd2cb59d26082e1641c3f21118db7ab14a5e4930 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 27 Jul 2021 09:37:31 +0200
Subject: [PATCH 18/24] AMD GPUs: On Apple OpenCL platform, we ask for the
 preferred kernel thread size rather than hard-coding 32 ECC secp256k1:
 Removed the inline assembly code for AMD GPUs because the latest JIT
 compilers optimize it with the same efficiency

---
 OpenCL/inc_ecc_secp256k1.cl |  8 ++++++--
 src/backend.c               | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl
index b3a70df78..a487152ec 100644
--- a/OpenCL/inc_ecc_secp256k1.cl
+++ b/OpenCL/inc_ecc_secp256k1.cl
@@ -124,7 +124,9 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b)
     :  "r"(a[0]),  "r"(a[1]),  "r"(a[2]),  "r"(a[3]),  "r"(a[4]),  "r"(a[5]),  "r"(a[6]),  "r"(a[7]),
        "r"(b[0]),  "r"(b[1]),  "r"(b[2]),  "r"(b[3]),  "r"(b[4]),  "r"(b[5]),  "r"(b[6]),  "r"(b[7])
   );
-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
+  // HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
+  #elif 0
   __asm__ __volatile__
   (
     "V_SUB_U32   %0,  %9, %17;"
@@ -176,7 +178,9 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b)
     :  "r"(a[0]),  "r"(a[1]),  "r"(a[2]),  "r"(a[3]),  "r"(a[4]),  "r"(a[5]),  "r"(a[6]),  "r"(a[7]),
        "r"(b[0]),  "r"(b[1]),  "r"(b[2]),  "r"(b[3]),  "r"(b[4]),  "r"(b[5]),  "r"(b[6]),  "r"(b[7])
   );
-  #elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1
+  // HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
+  #elif 0
   __asm__ __volatile__
   (
     "V_ADD_U32   %0,  %9, %17;"
diff --git a/src/backend.c b/src/backend.c
index d8d213bc8..aa28e02d5 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -9366,6 +9366,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
+          {
+            // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
+            #define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
+
+            // crazy, but apple does not support this query!
+            // the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel.
+            // so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default.
+            // we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0.
+
+            device_param->kernel_preferred_wgs_multiple = 0;
+          }
+
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
             cl_uint device_wavefront_width_amd;
@@ -12023,6 +12036,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1;
 
+        // apple hack, but perhaps also an alternative for other vendors
+
+        if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero;
+
         // GPU autotune init
 
         if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1) return -1;

From c9d79374a59bc4e2eed0620c59d11f9b07ab7ca4 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 27 Jul 2021 09:54:15 +0200
Subject: [PATCH 19/24] Add missing u64 vector datatypes mapping for OpenCL
 Improve u64 usage in OpenCL/inc_zip_inflate.cl

---
 OpenCL/inc_types.h        |  6 +++++-
 OpenCL/inc_zip_inflate.cl | 28 +++++++++++++++-------------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 295caabc0..9611ec05d 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -26,7 +26,11 @@ typedef unsigned long long  ullong;
 #endif
 
 #ifdef IS_OPENCL
-typedef ulong               ullong;
+typedef ulong   ullong;
+typedef ulong2  ullong2;
+typedef ulong4  ullong4;
+typedef ulong8  ullong8;
+typedef ulong16 ullong16;
 #endif
 
 #ifdef KERNEL_STATIC
diff --git a/OpenCL/inc_zip_inflate.cl b/OpenCL/inc_zip_inflate.cl
index fcc31e76b..00f762d81 100644
--- a/OpenCL/inc_zip_inflate.cl
+++ b/OpenCL/inc_zip_inflate.cl
@@ -202,10 +202,6 @@ DECLSPEC void *memset(u8 *s, int c, u32 len){
 #define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
 #define MZ_DEFAULT_WINDOW_BITS 15
 #define TINFL_LZ_DICT_SIZE 32768
-#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
-#define TINFL_MEMCPY_G(d, s, l, p) memcpy_g(d, s, l, p)
-#define TINFL_MEMSET(p, c, l) memset(p, c, (u32)l)
-#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
 
 // hashcat-patched/hashcat-specific:
 #ifdef CRC32_IN_INFLATE
@@ -581,7 +577,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
                     TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS);
                 }
                 n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
-                TINFL_MEMCPY_G(pOut_buf_cur, pIn_buf_cur, n, pStream);
+                memcpy_g(pOut_buf_cur, pIn_buf_cur, n, pStream);
                 pIn_buf_cur += n;
                 pOut_buf_cur += n;
                 counter -= (mz_uint)n;
@@ -599,7 +595,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
                 mz_uint i;
                 r->m_table_sizes[0] = 288;
                 r->m_table_sizes[1] = 32;
-                TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
+                memset(r->m_tables[1].m_code_size, 5, 32);
                 for (i = 0; i <= 143; ++i)
                     *p++ = 8;
                 for (; i <= 255; ++i)
@@ -616,7 +612,8 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
                     TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
                     r->m_table_sizes[counter] += s_min_table_sizes[counter];
                 }
-                MZ_CLEAR_OBJ(r->m_tables[2].m_code_size);
+                memset(r->m_tables[2].m_code_size, 0, TINFL_MAX_HUFF_SYMBOLS_0);
+
                 for (counter = 0; counter < r->m_table_sizes[2]; counter++)
                 {
                     mz_uint s;
@@ -631,9 +628,11 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
                 tinfl_huff_table *pTable;
                 mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16];
                 pTable = &r->m_tables[r->m_type];
-                MZ_CLEAR_OBJ(total_syms);
-                MZ_CLEAR_OBJ(pTable->m_look_up);
-                MZ_CLEAR_OBJ(pTable->m_tree);
+
+                memset((u8 *) total_syms, 0, 64);
+                memset((u8 *) pTable->m_look_up, 0, TINFL_FAST_LOOKUP_SIZE * 2);
+                memset((u8 *) pTable->m_tree, 0, TINFL_MAX_HUFF_SYMBOLS_0 * 2 * 2);
+
                 for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
                     total_syms[pTable->m_code_size[i]]++;
                 used_syms = 0, total = 0;
@@ -705,15 +704,18 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
                         num_extra = "\02\03\07"[dist - 16];
                         TINFL_GET_BITS(18, s, num_extra);
                         s += "\03\03\013"[dist - 16];
-                        TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
+
+                        memset(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
+
+
                         counter += s;
                     }
                     if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
                     {
                         TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
                     }
-                    TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]);
-                    TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
+                    memcpy(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]);
+                    memcpy(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
                 }
             }
             for (;;)

From 532a1545428a6caf06a78f62783542dce5e572e8 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 27 Jul 2021 12:02:27 +0200
Subject: [PATCH 20/24] ADL: Updated support for AMD Display Library to 15.0,
 updated datatypes and added support for OverDrive 7 and 8 based GPUs

---
 docs/changes.txt  |   2 +-
 include/ext_ADL.h | 267 ++++++++++++++++++++--------------------------
 src/ext_ADL.c     |   1 -
 src/hwmon.c       |  16 ++-
 4 files changed, 132 insertions(+), 154 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 2190b9c98..0c31c4cdf 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -30,7 +30,7 @@
 ## Technical
 ##
 
-- ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
+- ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
 - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
 - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
 - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
diff --git a/include/ext_ADL.h b/include/ext_ADL.h
index 238453b7c..369a8eb1a 100644
--- a/include/ext_ADL.h
+++ b/include/ext_ADL.h
@@ -13,22 +13,120 @@
 #include <windows.h>
 #endif // _WIN
 
-// Values taken from display-library-14.0.zip
+// Declarations from:
+// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_defines.h
+// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_structures.h
 
-/**
- * Declarations from adl_defines.h
- */
 
+/// Defines ADL_TRUE
+#define ADL_TRUE    1
+/// Defines ADL_FALSE
+#define ADL_FALSE        0
+
+//Define Performance Metrics Log max sensors number
+#define ADL_PMLOG_MAX_SENSORS  256
+
+typedef enum ADLSensorType
+{
+	SENSOR_MAXTYPES = 0,
+	PMLOG_CLK_GFXCLK = 1,
+	PMLOG_CLK_MEMCLK = 2,
+	PMLOG_CLK_SOCCLK = 3,
+	PMLOG_CLK_UVDCLK1 = 4,
+	PMLOG_CLK_UVDCLK2 = 5,
+	PMLOG_CLK_VCECLK = 6,
+	PMLOG_CLK_VCNCLK = 7,
+	PMLOG_TEMPERATURE_EDGE = 8,
+	PMLOG_TEMPERATURE_MEM = 9,
+	PMLOG_TEMPERATURE_VRVDDC = 10,
+	PMLOG_TEMPERATURE_VRMVDD = 11,
+	PMLOG_TEMPERATURE_LIQUID = 12,
+	PMLOG_TEMPERATURE_PLX = 13,
+	PMLOG_FAN_RPM = 14,
+	PMLOG_FAN_PERCENTAGE = 15,
+	PMLOG_SOC_VOLTAGE = 16,
+	PMLOG_SOC_POWER = 17,
+	PMLOG_SOC_CURRENT = 18,
+	PMLOG_INFO_ACTIVITY_GFX = 19,
+	PMLOG_INFO_ACTIVITY_MEM = 20,
+	PMLOG_GFX_VOLTAGE = 21,
+	PMLOG_MEM_VOLTAGE = 22,
+	PMLOG_ASIC_POWER = 23,
+	PMLOG_TEMPERATURE_VRSOC = 24,
+	PMLOG_TEMPERATURE_VRMVDD0 = 25,
+	PMLOG_TEMPERATURE_VRMVDD1 = 26,
+	PMLOG_TEMPERATURE_HOTSPOT = 27,
+        PMLOG_TEMPERATURE_GFX = 28,
+        PMLOG_TEMPERATURE_SOC = 29,
+        PMLOG_GFX_POWER = 30,
+        PMLOG_GFX_CURRENT = 31,
+        PMLOG_TEMPERATURE_CPU = 32,
+        PMLOG_CPU_POWER = 33,
+        PMLOG_CLK_CPUCLK = 34,
+        PMLOG_THROTTLER_STATUS = 35,
+        PMLOG_CLK_VCN1CLK1 = 36,
+        PMLOG_CLK_VCN1CLK2 = 37,
+        PMLOG_SMART_POWERSHIFT_CPU = 38,
+        PMLOG_SMART_POWERSHIFT_DGPU = 39,
+        PMLOG_BUS_SPEED = 40,
+        PMLOG_BUS_LANES = 41,
+	PMLOG_MAX_SENSORS_REAL
+} ADLSensorType;
+
+/// Defines the maximum string length
+#define ADL_MAX_CHAR                                    4096
+/// Defines the maximum string length
+#define ADL_MAX_PATH                                    256
+/// Defines the maximum number of supported adapters
+#define ADL_MAX_ADAPTERS                               250
+/// Defines the maxumum number of supported displays
+#define ADL_MAX_DISPLAYS                                150
+/// Defines the maxumum string length for device name
+#define ADL_MAX_DEVICENAME                                32
+/// Defines for all adapters
+#define ADL_ADAPTER_INDEX_ALL                            -1
+
+/// \defgroup define_adl_results Result Codes
+/// This group of definitions are the various results returned by all ADL functions \n
+/// @{
+/// All OK, but need to wait
+#define ADL_OK_WAIT                4
+/// All OK, but need restart
+#define ADL_OK_RESTART                3
+/// All OK but need mode change
+#define ADL_OK_MODE_CHANGE            2
+/// All OK, but with warning
+#define ADL_OK_WARNING                1
 /// ADL function completed successfully
 #define ADL_OK                    0
 /// Generic Error. Most likely one or more of the Escape calls to the driver failed!
 #define ADL_ERR                    -1
-
+/// ADL not initialized
+#define ADL_ERR_NOT_INIT            -2
+/// One of the parameter passed is invalid
+#define ADL_ERR_INVALID_PARAM            -3
+/// One of the parameter size is invalid
+#define ADL_ERR_INVALID_PARAM_SIZE        -4
+/// Invalid ADL index passed
+#define ADL_ERR_INVALID_ADL_IDX            -5
+/// Invalid controller index passed
+#define ADL_ERR_INVALID_CONTROLLER_IDX        -6
+/// Invalid display index passed
+#define ADL_ERR_INVALID_DIPLAY_IDX        -7
 /// Function  not supported by the driver
 #define ADL_ERR_NOT_SUPPORTED            -8
-
-/// Defines the maximum string length
-#define ADL_MAX_PATH                                    256
+/// Null Pointer error
+#define ADL_ERR_NULL_POINTER            -9
+/// Call can't be made due to disabled adapter
+#define ADL_ERR_DISABLED_ADAPTER        -10
+/// Invalid Callback
+#define ADL_ERR_INVALID_CALLBACK            -11
+/// Display Resource conflict
+#define ADL_ERR_RESOURCE_CONFLICT                -12
+//Failed to update some of the values. Can be returned by set request that include multiple values if not all values were successfully committed.
+#define ADL_ERR_SET_INCOMPLETE                 -20
+/// There's no Linux XDisplay in Linux Console environment
+#define ADL_ERR_NO_XDISPLAY                    -21
 
 //values for ADLFanSpeedValue.iSpeedType
 #define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT    1
@@ -37,9 +135,6 @@
 //values for ADLFanSpeedValue.iFlags
 #define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED   1
 
-//Define Performance Metrics Log max sensors number
-#define ADL_PMLOG_MAX_SENSORS  256
-
 /**
  * Declarations from adl_structures.h
  */
@@ -90,6 +185,7 @@ typedef struct AdapterInfo
     char strPNPString[ADL_MAX_PATH];
 /// It is generated from EnumDisplayDevices.
     int iOSDisplayIndex;
+
 #endif /* (_WIN32) || (_WIN64) */
 
 #if defined (LINUX)
@@ -181,90 +277,6 @@ typedef struct ADLFanSpeedValue
   int iFlags;
 } ADLFanSpeedValue;
 
-/////////////////////////////////////////////////////////////////////////////////////////////
-///\brief Structure containing information about the display device.
-///
-/// This structure is used to store display device information
-/// such as display index, type, name, connection status, mapped adapter and controller indexes,
-/// whether or not multiple VPUs are supported, local display connections or not (through Lasso), etc.
-/// This information can be returned to the user. Alternatively, it can be used to access various driver calls to set
-/// or fetch various display device related settings upon the user's request.
-/// \nosubgrouping
-////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct ADLDisplayID
-{
-/// The logical display index belonging to this adapter.
-    int iDisplayLogicalIndex;
-
-///\brief The physical display index.
-/// For example, display index 2 from adapter 2 can be used by current adapter 1.\n
-/// So current adapter may enumerate this adapter as logical display 7 but the physical display
-/// index is still 2.
-    int iDisplayPhysicalIndex;
-
-/// The persistent logical adapter index for the display.
-    int iDisplayLogicalAdapterIndex;
-
-///\brief The persistent physical adapter index for the display.
-/// It can be the current adapter or a non-local adapter. \n
-/// If this adapter index is different than the current adapter,
-/// the Display Non Local flag is set inside DisplayInfoValue.
-    int iDisplayPhysicalAdapterIndex;
-} ADLDisplayID, *LPADLDisplayID;
-
-/////////////////////////////////////////////////////////////////////////////////////////////
-///\brief Structure containing information about the display device.
-///
-/// This structure is used to store various information about the display device.  This
-/// information can be returned to the user, or used to access various driver calls to set
-/// or fetch various display-device-related settings upon the user's request
-/// \nosubgrouping
-////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct ADLDisplayInfo
-{
-/// The DisplayID structure
-    ADLDisplayID displayID;
-
-///\deprecated The controller index to which the display is mapped.\n Will not be used in the future\n
-    int  iDisplayControllerIndex;
-
-/// The display's EDID name.
-    char strDisplayName[ADL_MAX_PATH];
-
-/// The display's manufacturer name.
-    char strDisplayManufacturerName[ADL_MAX_PATH];
-
-/// The Display type. For example: CRT, TV, CV, DFP.
-    int  iDisplayType;
-
-/// The display output type. For example: HDMI, SVIDEO, COMPONMNET VIDEO.
-    int  iDisplayOutputType;
-
-/// The connector type for the device.
-    int  iDisplayConnector;
-
-///\brief The bit mask identifies the number of bits ADLDisplayInfo is currently using. \n
-/// It will be the sum all the bit definitions in ADL_DISPLAY_DISPLAYINFO_xxx.
-    int  iDisplayInfoMask;
-
-/// The bit mask identifies the display status. \ref define_displayinfomask
-    int  iDisplayInfoValue;
-} ADLDisplayInfo, *LPADLDisplayInfo;
-
-/////////////////////////////////////////////////////////////////////////////////////////////
-/// \brief Structure containing information about the BIOS.
-///
-/// This structure is used to store various information about the Chipset.  This
-/// information can be returned to the user.
-/// \nosubgrouping
-////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct ADLBiosInfo
-{
-    char strPartNumber[ADL_MAX_PATH];    ///< Part number.
-    char strVersion[ADL_MAX_PATH];        ///< Version number.
-    char strDate[ADL_MAX_PATH];        ///< BIOS date in yyyy/mm/dd hh:mm format.
-} ADLBiosInfo, *LPADLBiosInfo;
-
 /////////////////////////////////////////////////////////////////////////////////////////////
 ///\brief Structure containing information about current power management related activity.
 ///
@@ -346,7 +358,7 @@ typedef struct ADLODParameters
 /// This structure is used to store information about Overdrive 6 fan speed information
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6FanSpeedInfo
+typedef struct ADLOD6FanSpeedInfo
 {
     /// Contains a bitmap of the valid fan speed type flags.  Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM, \ref ADL_OD6_FANSPEED_USER_DEFINED
     int     iSpeedType;
@@ -368,7 +380,7 @@ typedef struct _ADLOD6FanSpeedInfo
 /// This structure is used to store information about Overdrive 6 fan speed value
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6FanSpeedValue
+typedef struct ADLOD6FanSpeedValue
 {
     /// Indicates the units of the fan speed.  Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM
     int     iSpeedType;
@@ -388,7 +400,7 @@ typedef struct _ADLOD6FanSpeedValue
 /// This structure is used to store information about current Overdrive 6 performance status.
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6CurrentStatus
+typedef struct ADLOD6CurrentStatus
 {
     /// Current engine clock in 10 KHz.
     int     iEngineClock;
@@ -419,7 +431,7 @@ typedef struct _ADLOD6CurrentStatus
 /// This structure is used to store information about Overdrive 6 clock range
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6ParameterRange
+typedef struct ADLOD6ParameterRange
 {
     /// The starting value of the clock range
     int     iMin;
@@ -436,7 +448,7 @@ typedef struct _ADLOD6ParameterRange
 /// This structure is used to store information about Overdrive 6 capabilities
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6Capabilities
+typedef struct ADLOD6Capabilities
 {
     /// Contains a bitmap of the OD6 capability flags.  Possible values: \ref ADL_OD6_CAPABILITY_SCLK_CUSTOMIZATION,
     /// \ref ADL_OD6_CAPABILITY_MCLK_CUSTOMIZATION, \ref ADL_OD6_CAPABILITY_GPU_ACTIVITY_MONITOR
@@ -487,7 +499,7 @@ typedef struct ADLODPerformanceLevel
 /// This structure is used to store information about Overdrive 6 clock values.
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6PerformanceLevel
+typedef struct ADLOD6PerformanceLevel
 {
     /// Engine (core) clock.
     int iEngineClock;
@@ -504,7 +516,7 @@ typedef struct _ADLOD6PerformanceLevel
 /// are contained in the aLevels array.
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLOD6StateInfo
+typedef struct ADLOD6StateInfo
 {
     /// Number of levels.  OD6 uses clock ranges instead of discrete performance levels.
     /// iNumberOfPerformanceLevels is always 2.  The 1st level indicates the minimum clocks
@@ -544,63 +556,18 @@ typedef struct ADLODPerformanceLevels
 /// This structure is used to store information about Performance Metrics data output
 /// \nosubgrouping
 ////////////////////////////////////////////////////////////////////////////////////////////
-typedef struct _ADLSingleSensorData
+typedef struct ADLSingleSensorData
 {
     int supported;
     int  value;
 } ADLSingleSensorData;
 
-typedef struct _ADLPMLogDataOutput
+typedef struct ADLPMLogDataOutput
 {
     int size;
     ADLSingleSensorData sensors[ADL_PMLOG_MAX_SENSORS];
 }ADLPMLogDataOutput;
 
-typedef enum _ADLSensorType
-{
-	SENSOR_MAXTYPES = 0,
-	PMLOG_CLK_GFXCLK = 1,
-	PMLOG_CLK_MEMCLK = 2,
-	PMLOG_CLK_SOCCLK = 3,
-	PMLOG_CLK_UVDCLK1 = 4,
-	PMLOG_CLK_UVDCLK2 = 5,
-	PMLOG_CLK_VCECLK = 6,
-	PMLOG_CLK_VCNCLK = 7,
-	PMLOG_TEMPERATURE_EDGE = 8,
-	PMLOG_TEMPERATURE_MEM = 9,
-	PMLOG_TEMPERATURE_VRVDDC = 10,
-	PMLOG_TEMPERATURE_VRMVDD = 11,
-	PMLOG_TEMPERATURE_LIQUID = 12,
-	PMLOG_TEMPERATURE_PLX = 13,
-	PMLOG_FAN_RPM = 14,
-	PMLOG_FAN_PERCENTAGE = 15,
-	PMLOG_SOC_VOLTAGE = 16,
-	PMLOG_SOC_POWER = 17,
-	PMLOG_SOC_CURRENT = 18,
-	PMLOG_INFO_ACTIVITY_GFX = 19,
-	PMLOG_INFO_ACTIVITY_MEM = 20,
-	PMLOG_GFX_VOLTAGE = 21,
-	PMLOG_MEM_VOLTAGE = 22,
-	PMLOG_ASIC_POWER = 23,
-	PMLOG_TEMPERATURE_VRSOC = 24,
-	PMLOG_TEMPERATURE_VRMVDD0 = 25,
-	PMLOG_TEMPERATURE_VRMVDD1 = 26,
-	PMLOG_TEMPERATURE_HOTSPOT = 27,
-        PMLOG_TEMPERATURE_GFX = 28,
-        PMLOG_TEMPERATURE_SOC = 29,
-        PMLOG_GFX_POWER = 30,
-        PMLOG_GFX_CURRENT = 31,
-        PMLOG_TEMPERATURE_CPU = 32,
-        PMLOG_CPU_POWER = 33,
-        PMLOG_CLK_CPUCLK = 34,
-        PMLOG_THROTTLER_STATUS = 35,
-        PMLOG_CLK_VCN1CLK1 = 36,
-        PMLOG_CLK_VCN1CLK2 = 37,
-        PMLOG_SMART_POWERSHIFT_CPU = 38,
-        PMLOG_SMART_POWERSHIFT_DGPU = 39,
-	PMLOG_MAX_SENSORS_REAL
-} ADLSensorType;
-
 /// \brief Handle to ADL client context.
 ///
 ///  ADL clients obtain context handle from initial call to \ref ADL2_Main_Control_Create.
@@ -628,7 +595,6 @@ typedef int HM_ADAPTER_ADL;
 typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET ) ( int, int* );
 typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET ) ( LPAdapterInfo, int );
 typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
-typedef int (ADL_API_CALL *ADL_DISPLAY_DISPLAYINFO_GET ) ( int, int *, ADLDisplayInfo **, int );
 typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
 typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )();
 typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET ) (int iAdapterIndex, ADLPMActivity *lpActivity);
@@ -654,7 +620,6 @@ typedef struct hm_adl_lib
   ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get;
   ADL_ADAPTER_ADAPTERINFO_GET ADL_Adapter_AdapterInfo_Get;
   ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
-  ADL_DISPLAY_DISPLAYINFO_GET ADL_Display_DisplayInfo_Get;
   ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
   ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
   ADL_OVERDRIVE5_CURRENTACTIVITY_GET ADL_Overdrive5_CurrentActivity_Get;
diff --git a/src/ext_ADL.c b/src/ext_ADL.c
index 08ba55e1e..9676c916f 100644
--- a/src/ext_ADL.c
+++ b/src/ext_ADL.c
@@ -53,7 +53,6 @@ int adl_init (void *hashcat_ctx)
   HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Adapter_AdapterInfo_Get, ADL_ADAPTER_ADAPTERINFO_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0);
-  HC_LOAD_FUNC(adl, ADL_Display_DisplayInfo_Get, ADL_DISPLAY_DISPLAYINFO_GET, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0);
   HC_LOAD_FUNC(adl, ADL_Overdrive5_CurrentActivity_Get, ADL_OVERDRIVE5_CURRENTACTIVITY_GET, ADL, 0);
diff --git a/src/hwmon.c b/src/hwmon.c
index ee5a001e8..4e6a4f986 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -682,7 +682,21 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
             return PMActivity.iCurrentBusLanes;
           }
 
-          // NO OD8
+          if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
+          {
+            ADLPMLogDataOutput odlpDataOutput;
+
+            memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
+
+            if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
+            {
+              hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
+
+              return -1;
+            }
+
+            return odlpDataOutput.sensors[PMLOG_BUS_LANES].value;
+          }
         }
 
         if (hwmon_ctx->hm_sysfs_amdgpu)

From 25f1c12e3c7ff4efc71b9dd10aa2de7f81dfc122 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 28 Jul 2021 07:51:27 +0200
Subject: [PATCH 21/24] SCRYPT Kernels: Add more optimized values for some new
 NV/AMD GPUs and new semi-automated derivation process description Blowfish
 Kernels: Backport optimizations reducing bank conflicts from bcrypt to
 Password Safe v2 and Open Document Format (ODF) 1.1

---
 docs/changes.txt           |  4 ++
 hashcat.hctune             | 88 ++++++++++++++++++++++++++------------
 src/modules/module_09000.c | 63 +++++++++++++++++++++------
 3 files changed, 115 insertions(+), 40 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 0c31c4cdf..8cad8aa31 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -19,6 +19,7 @@
 ##
 
 - AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
+- AMD GPUs: On Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32
 - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
 - Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1
 - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
@@ -31,7 +32,10 @@
 ##
 
 - ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
+- AMD Driver: Updated requirement for AMD Linux driver to ROCm 4.4 or later because of new HIP Interface
+- AMD Driver: Updated requirement for AMD Windows driver to Adrenalin 21.2.1 or later because of new ADL library
 - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
+- ECC secp256k1: Removed the inline assembly code for AMD GPUs because the latest JIT compilers optimize it with the same efficiency
 - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
 - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
 - SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
diff --git a/hashcat.hctune b/hashcat.hctune
index 2e1951eef..2b99ee149 100644
--- a/hashcat.hctune
+++ b/hashcat.hctune
@@ -279,7 +279,14 @@ GeForce_RTX_3090                                ALIAS_nv_sm50_or_higher
 ##
 
 Device_738c                                     ALIAS_AMD_MI100
+
+AMD_Radeon_(TM)_RX_480_Graphics                 ALIAS_AMD_RX480
+
+Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            ALIAS_AMD_Vega64
+AMD_Radeon_Vega_64                              ALIAS_AMD_Vega64
+
 Device_73bf                                     ALIAS_AMD_RX6900XT
+AMD_Radeon_RX_6900_XT                           ALIAS_AMD_RX6900XT
 
 #############
 ## ENTRIES ##
@@ -486,22 +493,41 @@ DEVICE_TYPE_GPU                                 *       14500   1       A
 ##
 ## Find the ideal -n value, then store it here along with the proper compute device name.
 ## Formatting guidelines are availabe at the top of this document.
+##
+## -------------------------------------------------
+##
+## You can also ignore all theoretical derivations and semi-automate the process in the real scenario (I prefer this approach):
+##
+## 1. For example, to find the value for 8900, first create a valid hash for 8900 as follows:
+##
+## $ ./hashcat --example-hashes -m 8900 | grep Example.Hash | grep -v Format | cut -b 25- > tmp.hash.8900
+##
+## 2. Now let it iterate through all -n values to a certain point. In this case, I'm using 200, but in general it's a value that is at least twice that of the multiprocessor. If you don't mind you can just leave it as it is, it just runs a little longer.
+##
+## $ export i=1; while [ $i -ne 201 ]; do echo $i; ./hashcat --quiet tmp.hash.8900 --keep-guessing --self-test-disable --markov-disable --restore-disable --outfile-autohex-disable --wordlist-autohex-disable --potfile-disable --logfile-disable --hwmon-disable --status --status-timer 1 --runtime 28 --machine-readable --optimized-kernel-enable --workload-profile 3 --hash-type 8900 --attack-mode 3 ?b?b?b?b?b?b?b --backend-devices 1 --force -n $i; i=$(($i+1)); done | tee x
+##
+## 3. Determine the highest measured H/s speed. But don't just use the highest value. Instead, use the number that seems most stable, usually at the beginning.
+##
+## $ grep "$(printf 'STATUS\t3')" x | cut -f4 -d$'\t' | sort -n | tail
+##
+## 4. To match the speed you have chosen to the correct value in the "x" file, simply search for it in it. Then go up a little on the block where you found him. The value -n is the single value that begins before the block start. If you have multiple blocks at the same speed, choose the lowest value for -n
+##
 
 ## 4GB
-GeForce_GTX_980                                 *       8900    1      28       A
+GeForce_GTX_980                                 *       8900    1      29       A
 GeForce_GTX_980                                 *       9300    1     128       A
-GeForce_GTX_980                                 *       15700   1      28       A
-GeForce_GTX_980                                 *       22700   1      28       A
+GeForce_GTX_980                                 *       15700   1      24       A
+GeForce_GTX_980                                 *       22700   1      29       A
 
 ## 8GB
-GeForce_GTX_1080                                *       8900    1      14       A
+GeForce_GTX_1080                                *       8900    1      15       A
 GeForce_GTX_1080                                *       9300    1     256       A
-GeForce_GTX_1080                                *       15700   1      14       A
-GeForce_GTX_1080                                *       22700   1      14       A
+GeForce_GTX_1080                                *       15700   1      28       A
+GeForce_GTX_1080                                *       22700   1      15       A
 
 ## 11GB
 GeForce_RTX_2080_Ti                             *       8900    1      68       A
-GeForce_RTX_2080_Ti                             *       9300    1     532       A
+GeForce_RTX_2080_Ti                             *       9300    1     528       A
 GeForce_RTX_2080_Ti                             *       15700   1      68       A
 GeForce_RTX_2080_Ti                             *       22700   1      68       A
 
@@ -509,7 +535,7 @@ GeForce_RTX_2080_Ti                             *       22700   1      68
 GeForce_RTX_3060_Ti                             *       8900    1      51       A
 GeForce_RTX_3060_Ti                             *       9300    1     256       A
 GeForce_RTX_3060_Ti                             *       15700   1      11       A
-GeForce_RTX_3060_Ti                             *       22700   1      43       A
+GeForce_RTX_3060_Ti                             *       22700   1      51       A
 
 ## 8GB
 GeForce_RTX_3070                                *       8900    1      46       A
@@ -517,26 +543,32 @@ GeForce_RTX_3070                                *       9300    1     368
 GeForce_RTX_3070                                *       15700   1      22       A
 GeForce_RTX_3070                                *       22700   1      46       A
 
+## 24GB
+GeForce_RTX_3090                                *       8900    1      82       A
+GeForce_RTX_3090                                *       9300    1     984       A
+GeForce_RTX_3090                                *       15700   1      82       A
+GeForce_RTX_3090                                *       22700   1      82       A
+
 ## 4GB
-AMD_Radeon_(TM)_RX_480_Graphics                 *       8900    1      14       A
-AMD_Radeon_(TM)_RX_480_Graphics                 *       9300    1     126       A
-AMD_Radeon_(TM)_RX_480_Graphics                 *       15700   1      14       A
-AMD_Radeon_(TM)_RX_480_Graphics                 *       22700   1      14       A
+ALIAS_AMD_RX480                                 *       8900    1      15       A
+ALIAS_AMD_RX480                                 *       9300    1     232       A
+ALIAS_AMD_RX480                                 *       15700   1      58       A
+ALIAS_AMD_RX480                                 *       22700   1      15       A
 
 ## 8GB
-Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       8900    1      28       A
-Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       9300    1     442       A
-Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       15700   1      28       A
-Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       22700   1      28       A
-
-## 32GB, WF64
-ALIAS_AMD_MI100                                 *       8900    1      76       A
-ALIAS_AMD_MI100                                 *       9300    1     288       A
-ALIAS_AMD_MI100                                 *       15700   1      76       A
-ALIAS_AMD_MI100                                 *       22700   1      76       A
-
-## 16GB, WF32
-ALIAS_AMD_RX6900XT                              *       8900    1      62       A
-ALIAS_AMD_RX6900XT                              *       9300    1     704       A
-ALIAS_AMD_RX6900XT                              *       15700   1      62       A
-ALIAS_AMD_RX6900XT                              *       22700   1      62       A
+ALIAS_AMD_Vega64                                *       8900    1      31       A
+ALIAS_AMD_Vega64                                *       9300    1     440       A
+ALIAS_AMD_Vega64                                *       15700   1      53       A
+ALIAS_AMD_Vega64                                *       22700   1      31       A
+
+## 32GB
+ALIAS_AMD_MI100                                 *       8900    1      79       A
+ALIAS_AMD_MI100                                 *       9300    1    1000       A
+ALIAS_AMD_MI100                                 *       15700   1     120       A
+ALIAS_AMD_MI100                                 *       22700   1      79       A
+
+## 16GB
+ALIAS_AMD_RX6900XT                              *       8900    1      59       A
+ALIAS_AMD_RX6900XT                              *       9300    1     720       A
+ALIAS_AMD_RX6900XT                              *       15700   1      56       A
+ALIAS_AMD_RX6900XT                              *       22700   1      59       A
diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c
index 3e3158c0a..28963a1fc 100644
--- a/src/modules/module_09000.c
+++ b/src/modules/module_09000.c
@@ -22,7 +22,8 @@ static const u64   KERN_TYPE      = 9000;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
-                                  | OPTS_TYPE_AUTODETECT_DISABLE;
+                                  | OPTS_TYPE_AUTODETECT_DISABLE
+                                  | OPTS_TYPE_DYNAMIC_SHARED;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "0a3f352686e5eb5be173e668a4fff5cd5df420927e1da2d5d4052340160637e3e6a5a92841a188ed240e13b919f3d91694bd4c0acba79271e9c08a83ea5ad387cbb74d5884066a1cb5a8caa80d847079168f84823847c631dbe3a834f1bc496acfebac3bff1608bf1c857717f8f428e07b5e2cb12aaeddfa83d7dcb6d840234d08b84f8ca6c6e562af73eea13148f7902bcaf0220d3e36eeeff1d37283dc421483a2791182614ebb";
@@ -75,16 +76,25 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
+  // this mode heavily depends on the available shared memory size
+  // note the kernel need to have some special code changes in order to make use to use post-48k memory region
+  // we need to set some macros
+
+  bool use_dynamic = false;
+
+  if (device_param->is_cuda == true)
+  {
+    use_dynamic = true;
+  }
+
   // this uses some nice feedback effect.
   // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
   // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
   // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
 
-  u32 fixed_local_size = 0;
-
   if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
-    fixed_local_size = 1;
+    hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", 1);
   }
   else
   {
@@ -100,29 +110,58 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
       if (device_param->is_opencl == true)
       {
-        overhead = 4;
+        overhead = 1;
       }
     }
 
     if (user_options->kernel_threads_chgd == true)
     {
-      fixed_local_size = user_options->kernel_threads;
+      u32 fixed_local_size = user_options->kernel_threads;
+
+      if (use_dynamic == true)
+      {
+        if ((fixed_local_size * 4096) > device_param->kernel_dynamic_local_mem_size_memset)
+        {
+          // otherwise out-of-bound reads
 
-      // otherwise out-of-bound reads
+          fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096;
+        }
 
-      if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead))
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size);
+      }
+      else
       {
-        fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+        if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead))
+        {
+          // otherwise out-of-bound reads
+
+          fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+        }
+
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
       }
     }
     else
     {
-      fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+      if (use_dynamic == true)
+      {
+        // using kernel_dynamic_local_mem_size_memset is a bit hackish.
+        // we had to brute-force this value out of an already loaded CUDA function.
+        // there's no official way to query for this value.
+
+        const u32 fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096;
+
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size);
+      }
+      else
+      {
+        const u32 fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+
+        hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
+      }
     }
   }
 
-  hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
-
   return jit_build_options;
 }
 

From 9c0a37accf9709dfb4613d1b7d29fb62743fa253 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 28 Jul 2021 07:56:34 +0200
Subject: [PATCH 22/24] Update driver requirement

---
 docs/readme.txt | 4 ++--
 src/backend.c   | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/readme.txt b/docs/readme.txt
index ca10a4644..d70edfcb7 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -10,8 +10,8 @@
 hashcat v6.2.3
 ==============
 
-AMD GPUs on Linux require "RadeonOpenCompute (ROCm)" Software Platform (3.1 or later)
-AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (20.2.2 or later)
+AMD GPUs on Linux require "AMD ROCm" (4.4 or later)
+AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (21.2.1 or later)
 Intel CPUs require "OpenCL Runtime for Intel Core and Intel Xeon Processors" (16.1.1 or later)
 NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or later)
 
diff --git a/src/backend.c b/src/backend.c
index cddd8cb5d..eef57d412 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -7224,10 +7224,10 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       #if defined (__linux__)
       event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:");
-      event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (3.1 or later)");
+      event_log_warning (hashcat_ctx, "  \"AMD ROCm\" (4.4 or later)");
       #elif defined (_WIN)
       event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:");
-      event_log_warning (hashcat_ctx, "  \"AMD Radeon Adrenalin 2020 Edition\" (20.2.2 or later)");
+      event_log_warning (hashcat_ctx, "  \"AMD Radeon Adrenalin 2020 Edition\" (21.2.1 or later)");
       #endif
 
       event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime:");
@@ -7549,10 +7549,10 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     #if defined (__linux__)
     event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:");
-    event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (3.1 or later)");
+    event_log_warning (hashcat_ctx, "  \"AMD ROCm\" (4.4 or later)");
     #elif defined (_WIN)
     event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:");
-    event_log_warning (hashcat_ctx, "  \"AMD Radeon Adrenalin 2020 Edition\" (20.2.2 or later)");
+    event_log_warning (hashcat_ctx, "  \"AMD Radeon Adrenalin 2020 Edition\" (21.2.1 or later)");
     #endif
 
     event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime:");

From a4299b74afc3c16d2243487a612d0aac21399c35 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 28 Jul 2021 20:50:05 +0200
Subject: [PATCH 23/24] Memory Management: Refactored the code responsible for
 limiting kernel accel in order to avoid out of -host- memory situations

---
 docs/changes.txt            |  1 +
 src/backend.c               | 40 ++++++++++++++++++++++++-------------
 src/modules/module_17200.c  |  2 +-
 src/modules/module_17220.c  |  2 +-
 src/modules/module_17225.c  |  2 +-
 src/modules/module_21800.c  |  3 ++-
 tools/cryptoloop2hashcat.py |  0
 7 files changed, 32 insertions(+), 18 deletions(-)
 mode change 100644 => 100755 tools/cryptoloop2hashcat.py

diff --git a/docs/changes.txt b/docs/changes.txt
index 8cad8aa31..3acde9a61 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -38,6 +38,7 @@
 - ECC secp256k1: Removed the inline assembly code for AMD GPUs because the latest JIT compilers optimize it with the same efficiency
 - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
 - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
+- Memory Management: Refactored the code responsible for limiting kernel accel in order to avoid out of -host- memory situations
 - SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
 
 ##
diff --git a/src/backend.c b/src/backend.c
index aa28e02d5..63e0beb3b 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -14882,14 +14882,34 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     u32 kernel_accel_min = device_param->kernel_accel_min;
     u32 kernel_accel_max = device_param->kernel_accel_max;
 
-    /**
-     * We need a kernel accel limiter otherwise we will allocate too much memory (Example 4* GTX1080):
-     * 4 (gpus) * 260 (sizeof pw_t) * 3 (pws, pws_comp, pw_pre) * 20 (MCU) * 1024 (threads) * 1024 (accel) = 65,431,142,400 bytes RAM!!
-     */
+    // We need to deal with the situation that the total video RAM > total host RAM.
+    // Especially in multi-GPU setups that is very likely.
+    // The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates.
+    // They are stored in an aligned order for better performance, but this increases the memory pressure.
+    // The best way to keep these buffers to a reasonable size is by controlling the kernel_accel parameter.
+    //
+    // In theory this check could be disabled if we check if total video RAM < total host RAM,
+    // but at this point of initialization phase we don't have this information available.
+
+    // We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU
+
+    const u64 SIZE_8GB = 8UL * 1024 * 1024 * 1024;
+
+    u64 accel_limit = SIZE_8GB;
+
+    // this is device_processors * kernel_threads
 
-    const int max_gb = (hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1024 : 64;
+    accel_limit /= device_param->hardware_power;
 
-    const u32 accel_limit = CEILDIV ((max_gb * 1024), kernel_threads); // this should result in less than 4GB per GPU, but allow higher accel in case user reduces the threads manually using -T
+    // single password candidate size
+
+    accel_limit /= sizeof (pw_t);
+
+    // pws[], pws_comp[] and pw_pre[] are some large blocks with password candidates
+
+    accel_limit /= 3;
+
+    // I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel()
 
     kernel_accel_max = MIN (kernel_accel_max, accel_limit);
 
@@ -14916,12 +14936,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     u64 size_brain_link_out = 4;
     #endif
 
-    // instead of a thread limit we can also use a memory limit.
-    // this value should represent a reasonable amount of memory a host system has per GPU.
-    // note we're allocating 3 blocks of that size.
-
-    const u64 PWS_SPACE = 1024ULL * 1024ULL * 1024ULL;
-
     while (kernel_accel_max >= kernel_accel_min)
     {
       const u64 kernel_power_max = device_param->hardware_power * kernel_accel_max;
@@ -14971,8 +14985,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       int memory_limit_hit = 0;
 
-      if (size_pws > PWS_SPACE) memory_limit_hit = 1;
-
       // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
       // let's add some extra space just to be sure.
       // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
diff --git a/src/modules/module_17200.c b/src/modules/module_17200.c
index 44798110d..89123a26f 100644
--- a/src/modules/module_17200.c
+++ b/src/modules/module_17200.c
@@ -102,7 +102,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "PKZIP (Compressed)";
 static const u64   KERN_TYPE      = 17200;
 static const u32   OPTI_TYPE      = 0;
-static const u64   OPTS_TYPE      = 0;
+static const u64   OPTS_TYPE      = OPTS_TYPE_NATIVE_THREADS;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$pkzip2$1*1*2*0*e3*1c5*eda7a8de*0*28*8*e3*eda7*5096*a9fc1f4e951c8fb3031a6f903e5f4e3211c8fdc4671547bf77f6f682afbfcc7475d83898985621a7af9bccd1349d1976500a68c48f630b7f22d7a0955524d768e34868880461335417ddd149c65a917c0eb0a4bf7224e24a1e04cf4ace5eef52205f4452e66ded937db9545f843a68b1e84a2e933cc05fb36d3db90e6c5faf1bee2249fdd06a7307849902a8bb24ec7e8a0886a4544ca47979a9dfeefe034bdfc5bd593904cfe9a5309dd199d337d3183f307c2cb39622549a5b9b8b485b7949a4803f63f67ca427a0640ad3793a519b2476c52198488e3e2e04cac202d624fb7d13c2*$/pkzip2$";
diff --git a/src/modules/module_17220.c b/src/modules/module_17220.c
index 9028040d9..991f39a0f 100644
--- a/src/modules/module_17220.c
+++ b/src/modules/module_17220.c
@@ -102,7 +102,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "PKZIP (Compressed Multi-File)";
 static const u64   KERN_TYPE      = 17220;
 static const u32   OPTI_TYPE      = 0;
-static const u64   OPTS_TYPE      = 0;
+static const u64   OPTS_TYPE      = OPTS_TYPE_NATIVE_THREADS;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$pkzip2$3*1*1*0*8*24*a425*8827*d1730095cd829e245df04ebba6c52c0573d49d3bbeab6cb385b7fa8a28dcccd3098bfdd7*1*0*8*24*2a74*882a*51281ac874a60baedc375ca645888d29780e20d4076edd1e7154a99bde982152a736311f*2*0*e3*1c5*eda7a8de*0*29*8*e3*eda7*5096*1455781b59707f5151139e018bdcfeebfc89bc37e372883a7ec0670a5eafc622feb338f9b021b6601a674094898a91beac70e41e675f77702834ca6156111a1bf7361bc9f3715d77dfcdd626634c68354c6f2e5e0a7b1e1ce84a44e632d0f6e36019feeab92fb7eac9dda8df436e287aafece95d042059a1b27d533c5eab62c1c559af220dc432f2eb1a38a70f29e8f3cb5a207704274d1e305d7402180fd47e026522792f5113c52a116d5bb25b67074ffd6f4926b221555234aabddc69775335d592d5c7d22462b75de1259e8342a9ba71cb06223d13c7f51f13be2ad76352c3b8ed*$/pkzip2$";
diff --git a/src/modules/module_17225.c b/src/modules/module_17225.c
index 75c376c9e..fc82777f6 100644
--- a/src/modules/module_17225.c
+++ b/src/modules/module_17225.c
@@ -102,7 +102,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "PKZIP (Mixed Multi-File)";
 static const u64   KERN_TYPE      = 17225;
 static const u32   OPTI_TYPE      = 0;
-static const u64   OPTS_TYPE      = 0;
+static const u64   OPTS_TYPE      = OPTS_TYPE_NATIVE_THREADS;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$pkzip2$3*1*1*0*0*24*3e2c*3ef8*0619e9d17ff3f994065b99b1fa8aef41c056edf9fa4540919c109742dcb32f797fc90ce0*1*0*8*24*431a*3f26*18e2461c0dbad89bd9cc763067a020c89b5e16195b1ac5fa7fb13bd246d000b6833a2988*2*0*23*17*1e3c1a16*2e4*2f*0*23*1e3c*3f2d*54ea4dbc711026561485bbd191bf300ae24fa0997f3779b688cdad323985f8d3bb8b0c*$/pkzip2$";
diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c
index a3f3a00c2..c256d5d5c 100644
--- a/src/modules/module_21800.c
+++ b/src/modules/module_21800.c
@@ -24,7 +24,8 @@ static const u64   KERN_TYPE      = 21800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_NATIVE_THREADS;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$electrum$5*02170fee7c35f1ef3b229edc90fbd0793b688a0d6f41137a97aab2343d315cce16*94cf72d8f5d774932b414a3344984859e43721268d2eb35fa531de5a2fc7024b463c730a54f4f46229dd9fede5034b19ac415c2916e9c16b02094f845795df0c397ff76d597886b1f9e014ad1a8f64a3f617d9900aa645b3ba86f16ce542251fc22c41d93fa6bc118be96d9582917e19d2a299743331804cfc7ce2c035367b4cbcfb70adfb1e10a0f2795769f2165d8fd13daa8b45eeac495b5b63e91a87f63b42e483f84a881e49adecacf6519cb564694b42dd9fe80fcbc6cdb63cf5ae33f35255266f5c2524dd93d3cc15eba0f2ccdc3c109cc2d7e8f711b8b440f168caf8b005e8bcdfe694148e94a04d2a738f09349a96600bd8e8edae793b26ebae231022f24e96cb158db141ac40400a9e9ef099e673cfe017281537c57f82fb45c62bdb64462235a6eefb594961d5eb2c46537958e4d04250804c6e9f343ab7a0db07af6b8a9d1a6c5cfcd311b8fb8383ac9ed9d98d427d526c2f517fc97473bd87cb59899bd0e8fb8c57fa0f7e0d53daa57c972cf92764af4b1725a5fb8f504b663ec519731929b3caaa793d8ee74293eee27d0e208a60e26290bc546e6fa9ed865076e13febfea249729218c1b5752e912055fbf993fbac5df2cca2b37c5e0f9c30789858ceeb3c482a8db123966775aeed2eee2fc34efb160d164929f51589bff748ca773f38978bff3508d5a7591fb2d2795df983504a788071f469d78c88fd7899cabbc5804f458653d0206b82771a59522e1fa794d7de1536c51a437f5d6df5efd6654678e5794ca429b5752e1103340ed80786f1e9da7f5b39af628b2212e4d88cd36b8a7136d50a6b6e275ab406ba7c57cc70d77d01c4c16e9363901164fa92dc9e9b99219d5376f24862e775968605001e71b000e2c7123b4b43f3ca40db17efd729388782e46e64d43ccb947db4eb1473ff1a3836b74fe312cd1a33b73b8b8d80c087088932277773c329f2f66a01d6b3fc1e651c56959ebbed7b14a21b977f3acdedf1a0d98d519a74b50c39b3052d840106da4145345d86ec0461cddafacc2a4f0dd646457ad05bf04dcbcc80516a5c5ed14d2d639a70e77b686f19cbfb63f546d81ae19cc8ba35cce3f3b5b9602df25b678e14411fecec87b8347f5047513df415c6b1a3d39871a6bcb0f67d9cf8311596deae45fd1d84a04fd58f1fd55c5156b7309af09094c99a53674809cb87a45f95a2d69f9997a38085519cb4e056f9efd56672a2c1fe927d5ea8eec25b8aff6e56f9a2310f1a481daf407b8adf16201da267c59973920fd21bb087b88123ef98709839d6a3ee34efb8ccd5c15ed0e46cff3172682769531164b66c8689c35a26299dd26d09233d1f64f9667474141cf9c6a6de7f2bc52c3bb44cfe679ff4b912c06df406283836b3581773cb76d375304f46239da5996594a8d03b14c02f1b35a432dc44a96331242ae31174*33a7ee59d6d17ed1ee99dc0a71771227e6f3734b17ba36eb589bdced56244135";
diff --git a/tools/cryptoloop2hashcat.py b/tools/cryptoloop2hashcat.py
old mode 100644
new mode 100755

From d38d40c8ba403ad0c37e43bbfe33da22cb21dcb5 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 29 Jul 2021 10:49:44 +0200
Subject: [PATCH 24/24] Unlock all GPU threads for AMD GPUs if WaveFront size
 is 32 (basically new models) Add new hash-modes to tools/benchmark_deep.pl
 Fix MINGW issue on 64 bit constant in refactored kernel-accel limiting
 section

---
 src/backend.c           | 29 ++++++++++++++++++++++-------
 tools/benchmark_deep.pl | 17 +++++++++++++++++
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 63e0beb3b..057425250 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -10411,11 +10411,21 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
     }
     else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
     {
-      kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
+      if (device_param->kernel_preferred_wgs_multiple == 64)
+      {
+        // only older AMD GPUs with WaveFront size 64 benefit from this
+
+        kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
+      }
     }
     else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
     {
-      kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
+      if (device_param->kernel_preferred_wgs_multiple == 64)
+      {
+        // only older AMD GPUs with WaveFront size 64 benefit from this
+
+        kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
+      }
     }
   }
 
@@ -10719,7 +10729,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       //hiprtc_options[1] = "--device-as-default-execution-space";
       //hiprtc_options[2] = "--gpu-architecture";
 
-      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
+      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
 
       //hiprtc_options[0] = "--gpu-max-threads-per-block=64";
       hiprtc_options[1] = "-nocudainc";
@@ -11804,7 +11814,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->device_name,
       device_param->opencl_device_version,
       device_param->opencl_driver_version,
-      (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
+      (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
 
     md5_ctx_t md5_ctx;
 
@@ -12139,7 +12149,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->vector_width,
         hashconfig->kern_type,
         extra_value,
-        (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple,
+        (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX),
         build_options_module_buf);
 
       md5_ctx_t md5_ctx;
@@ -14883,6 +14893,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     u32 kernel_accel_max = device_param->kernel_accel_max;
 
     // We need to deal with the situation that the total video RAM > total host RAM.
+    // For the opposite direction, we do that in the loop section below.
     // Especially in multi-GPU setups that is very likely.
     // The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates.
     // They are stored in an aligned order for better performance, but this increases the memory pressure.
@@ -14893,7 +14904,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     // We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU
 
-    const u64 SIZE_8GB = 8UL * 1024 * 1024 * 1024;
+    const u64 SIZE_8GB = 8ULL * 1024 * 1024 * 1024;
 
     u64 accel_limit = SIZE_8GB;
 
@@ -14909,6 +14920,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     accel_limit /= 3;
 
+    // Is possible that the GPU simply has too much hardware resources and 8GB per GPU is not enough, but OTOH we can't get lower than 1
+
+    accel_limit = MAX (accel_limit, 1);
+
     // I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel()
 
     kernel_accel_max = MIN (kernel_accel_max, accel_limit);
@@ -14921,7 +14936,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       return -1;
     }
 
-    // find out if we would request too much memory on memory blocks which are based on kernel_accel
+    // Opposite direction check: find out if we would request too much memory on memory blocks which are based on kernel_accel
 
     u64 size_pws      = 4;
     u64 size_pws_amp  = 4;
diff --git a/tools/benchmark_deep.pl b/tools/benchmark_deep.pl
index df6777441..ba2db8856 100755
--- a/tools/benchmark_deep.pl
+++ b/tools/benchmark_deep.pl
@@ -230,16 +230,19 @@ my @hash_types =
   13751,
   13761,
   13771,
+  13781,
   13800,
   13900,
   14000,
   14100,
   14400,
+  14500,
   14700,
   14800,
   14900,
   15000,
   15100,
+  15200,
   15300,
   15400,
   15500,
@@ -250,10 +253,13 @@ my @hash_types =
   16200,
   16300,
   16400,
+  16500,
   16600,
+  16700,
   16800,
   16801,
   16900,
+  17210,
   17300,
   17400,
   17500,
@@ -333,12 +339,23 @@ my @hash_types =
   24700,
   24800,
   24900,
+  25000,
+  25100,
+  25200,
   25300,
   25400,
   25500,
+  25700,
   25900,
   26000,
   26100,
+  26200,
+  26300,
+  26401,
+  26402,
+  26403,
+  26500,
+  26600,
 );
 
 if (scalar @ARGV)