diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index bf8aeccd5..82b50b7c8 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -528,7 +528,7 @@ DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32)
   asm volatile ("bfe.u32 %0, %1,  0, 8;" : "=r"(r.sf) : "r"(v32.sf));
   #endif
 
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 0) & 0xff;
@@ -575,7 +575,7 @@ DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32)
   asm volatile ("bfe.u32 %0, %1,  8, 8;" : "=r"(r.sf) : "r"(v32.sf));
   #endif
 
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 8) & 0xff;
@@ -622,7 +622,7 @@ DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32)
   asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf));
   #endif
 
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 16) & 0xff;
@@ -669,7 +669,7 @@ DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32)
   asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf));
   #endif
 
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 24) & 0xff;
@@ -684,7 +684,7 @@ DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32)
 
   #if   defined IS_NV  && HAS_BFE  == 1
   asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32));
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 0) & 0xff;
@@ -699,7 +699,7 @@ DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32)
 
   #if   defined IS_NV  && HAS_BFE  == 1
   asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32));
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 8) & 0xff;
@@ -714,7 +714,7 @@ DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32)
 
   #if   defined IS_NV  && HAS_BFE  == 1
   asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32));
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 16) & 0xff;
@@ -729,7 +729,7 @@ DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32)
 
   #if   defined IS_NV  && HAS_BFE  == 1
   asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32));
-  //#elif defined IS_AMD && HAS_VBFE == 1
+  //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
   //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
   #else
   r = (v32 >> 24) & 0xff;
@@ -879,7 +879,7 @@ DECLSPEC u32x hc_rotl32 (const u32x a, const int n)
 {
   #if   defined _CPU_OPENCL_EMU_H
   return rotl32 (a, n);
-  #elif defined IS_CUDA
+  #elif defined IS_CUDA || defined IS_HIP
   return rotl32 (a, n);
   #else
   #ifdef USE_ROTATE
@@ -894,7 +894,7 @@ DECLSPEC u32x hc_rotr32 (const u32x a, const int n)
 {
   #if   defined _CPU_OPENCL_EMU_H
   return rotr32 (a, n);
-  #elif defined IS_CUDA
+  #elif defined IS_CUDA || defined IS_HIP
   return rotr32 (a, n);
   #else
   #ifdef USE_ROTATE
@@ -909,7 +909,7 @@ DECLSPEC u32 hc_rotl32_S (const u32 a, const int n)
 {
   #if   defined _CPU_OPENCL_EMU_H
   return rotl32 (a, n);
-  #elif defined IS_CUDA
+  #elif defined IS_CUDA || defined IS_HIP
   return rotl32_S (a, n);
   #else
   #ifdef USE_ROTATE
@@ -924,7 +924,7 @@ DECLSPEC u32 hc_rotr32_S (const u32 a, const int n)
 {
   #if   defined _CPU_OPENCL_EMU_H
   return rotr32 (a, n);
-  #elif defined IS_CUDA
+  #elif defined IS_CUDA || defined IS_HIP
   return rotr32_S (a, n);
   #else
   #ifdef USE_ROTATE
@@ -941,7 +941,7 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
   return rotl64 (a, n);
   #elif defined IS_CUDA
   return rotl64 (a, n);
-  #elif defined IS_AMD
+  #elif (defined IS_AMD || defined IS_HIP)
   return rotl64 (a, n);
   #else
   #ifdef USE_ROTATE
@@ -958,7 +958,7 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
   return rotr64 (a, n);
   #elif defined IS_CUDA
   return rotr64 (a, n);
-  #elif defined IS_AMD
+  #elif (defined IS_AMD || defined IS_HIP)
   return rotr64 (a, n);
   #else
   #ifdef USE_ROTATE
@@ -975,7 +975,7 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n)
   return rotl64 (a, n);
   #elif defined IS_CUDA
   return rotl64_S (a, n);
-  #elif defined IS_AMD
+  #elif (defined IS_AMD || defined IS_HIP)
   return rotl64_S (a, n);
   #else
   #ifdef USE_ROTATE
@@ -992,7 +992,7 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n)
   return rotr64 (a, n);
   #elif defined IS_CUDA
   return rotr64_S (a, n);
-  #elif defined IS_AMD
+  #elif (defined IS_AMD || defined IS_HIP)
   return rotr64_S (a, n);
   #else
   #ifdef USE_ROTATE
@@ -1012,7 +1012,7 @@ DECLSPEC u32x hc_swap32 (const u32x v)
   #ifdef _CPU_OPENCL_EMU_H
   r = byte_swap_32 (v);
   #else
-  #if   defined IS_AMD && HAS_VPERM == 1
+  #if   (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
 
   const u32 m = 0x00010203;
 
@@ -1109,7 +1109,7 @@ DECLSPEC u32 hc_swap32_S (const u32 v)
   #ifdef _CPU_OPENCL_EMU_H
   r = byte_swap_32 (v);
   #else
-  #if   defined IS_AMD && HAS_VPERM == 1
+  #if   (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
   __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(0x00010203));
   #elif defined IS_NV  && HAS_PRMT  == 1
   asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
@@ -1135,7 +1135,7 @@ DECLSPEC u64x hc_swap64 (const u64x v)
   #ifdef _CPU_OPENCL_EMU_H
   r = byte_swap_64 (v);
   #else
-  #if   defined IS_AMD && HAS_VPERM == 1
+  #if   (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
 
   const u32 m = 0x00010203;
 
@@ -1354,7 +1354,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
   #ifdef _CPU_OPENCL_EMU_H
   r = byte_swap_64 (v);
   #else
-  #if   defined IS_AMD && HAS_VPERM == 1
+  #if   (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
   const u32 m = 0x00010203;
 
   const u32 v0 = h32_from_64_S (v);
@@ -1399,7 +1399,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
   return r;
 }
 
-#ifdef IS_AMD
+#if (defined IS_AMD || defined IS_HIP)
 
 DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
 {
@@ -2767,7 +2767,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2)
   out1[1] = hc_byte_perm (in[0], 0, 0x3727);
   out1[0] = hc_byte_perm (in[0], 0, 0x1707);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
 
   out2[3] = hc_byte_perm (in[3], 0, 0x03070207);
   out2[2] = hc_byte_perm (in[3], 0, 0x01070007);
@@ -2805,7 +2805,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2)
   out1[1] = hc_byte_perm (in[0], 0, 0x1707);
   out1[0] = hc_byte_perm (in[0], 0, 0x3727);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
 
   out2[3] = hc_byte_perm (in[3], 0, 0x01070007);
   out2[2] = hc_byte_perm (in[3], 0, 0x03070207);
@@ -2843,7 +2843,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2)
   out1[1] = hc_byte_perm (in[0], 0, 0x7372);
   out1[0] = hc_byte_perm (in[0], 0, 0x7170);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
 
   out2[3] = hc_byte_perm (in[3], 0, 0x07030702);
   out2[2] = hc_byte_perm (in[3], 0, 0x07010700);
@@ -2881,7 +2881,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2)
   out1[1] = hc_byte_perm (in[0], 0, 0x7170);
   out1[0] = hc_byte_perm (in[0], 0, 0x7372);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
 
   out2[3] = hc_byte_perm (in[3], 0, 0x07010700);
   out2[2] = hc_byte_perm (in[3], 0, 0x07030702);
@@ -2915,7 +2915,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out)
   out[2] = hc_byte_perm (in2[0], in2[1], 0x4602);
   out[3] = hc_byte_perm (in2[2], in2[3], 0x4602);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
 
   out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002);
   out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002);
@@ -2945,7 +2945,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out)
   out[2] = hc_byte_perm (in2[0], in2[1], 0x6420);
   out[3] = hc_byte_perm (in2[2], in2[3], 0x6420);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
 
   out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200);
   out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200);
@@ -3069,7 +3069,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -3394,7 +3394,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -3404,7 +3404,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -3737,7 +3737,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le (u32x *w0, u32x *w1, u32x *w2, u3
 {
   const int offset_switch = offset / 4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -4665,7 +4665,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -4990,13 +4990,13 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -5329,7 +5329,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -5790,13 +5790,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -6265,7 +6265,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -7422,7 +7422,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -7432,7 +7432,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -8005,7 +8005,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -9690,7 +9690,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -9700,7 +9700,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -11393,7 +11393,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -12550,13 +12550,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -13721,7 +13721,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -15406,13 +15406,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -17105,7 +17105,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -21462,7 +21462,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -21472,7 +21472,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -25837,7 +25837,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -30194,13 +30194,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -36533,7 +36533,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2)
   out1[1] = hc_byte_perm_S (in[0], 0, 0x3727);
   out1[0] = hc_byte_perm_S (in[0], 0, 0x1707);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
 
   out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207);
   out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007);
@@ -36571,7 +36571,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2)
   out1[1] = hc_byte_perm_S (in[0], 0, 0x7372);
   out1[0] = hc_byte_perm_S (in[0], 0, 0x7170);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
 
   out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702);
   out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700);
@@ -36605,7 +36605,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
   out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
   out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
 
   out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
   out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
@@ -36635,7 +36635,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
   out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
   out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
 
-  #elif defined IS_AMD && HAS_VPERM
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
 
   out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
   out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
@@ -36660,7 +36660,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -36985,7 +36985,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -36995,7 +36995,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -37328,7 +37328,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32
 {
   const int offset_switch = offset / 4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -38256,7 +38256,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -38581,13 +38581,13 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -38920,7 +38920,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -39381,13 +39381,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -39856,7 +39856,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -41013,7 +41013,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -41023,7 +41023,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -41596,7 +41596,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -43281,7 +43281,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -43291,7 +43291,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -44984,7 +44984,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -46141,13 +46141,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -47312,7 +47312,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -48997,13 +48997,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
@@ -50696,7 +50696,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -55053,7 +55053,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -55063,7 +55063,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -59428,7 +59428,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
 {
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   switch (offset_switch)
   {
     case  0:
@@ -63785,13 +63785,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   #if defined IS_NV
   const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
   #endif
 
diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h
index 91d59b736..ebd0107c1 100644
--- a/OpenCL/inc_common.h
+++ b/OpenCL/inc_common.h
@@ -26,7 +26,7 @@
  *   - P19: Type of the esalt_bufs structure with additional data, or void.
  */
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 #define KERN_ATTR(p2,p4,p5,p6,p19)                                  \
   MAYBE_UNUSED GLOBAL_AS       pw_t          *pws,                  \
   MAYBE_UNUSED p2        const kernel_rule_t *g_rules_buf,          \
@@ -113,7 +113,7 @@
  * do not use rules or tmps, etc.
  */
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 #define KERN_ATTR_BASIC()                 KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     void, void, void)
 #define KERN_ATTR_BITSLICE()              KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bs_word_t *g_words_buf_s, void, void, void)
 #define KERN_ATTR_ESALT(e)                KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     void, void, e)
diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl
index e21f528d6..b3a70df78 100644
--- a/OpenCL/inc_ecc_secp256k1.cl
+++ b/OpenCL/inc_ecc_secp256k1.cl
@@ -124,7 +124,7 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b)
     :  "r"(a[0]),  "r"(a[1]),  "r"(a[2]),  "r"(a[3]),  "r"(a[4]),  "r"(a[5]),  "r"(a[6]),  "r"(a[7]),
        "r"(b[0]),  "r"(b[1]),  "r"(b[2]),  "r"(b[3]),  "r"(b[4]),  "r"(b[5]),  "r"(b[6]),  "r"(b[7])
   );
-  #elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
   __asm__ __volatile__
   (
     "V_SUB_U32   %0,  %9, %17;"
@@ -176,7 +176,7 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b)
     :  "r"(a[0]),  "r"(a[1]),  "r"(a[2]),  "r"(a[3]),  "r"(a[4]),  "r"(a[5]),  "r"(a[6]),  "r"(a[7]),
        "r"(b[0]),  "r"(b[1]),  "r"(b[2]),  "r"(b[3]),  "r"(b[4]),  "r"(b[5]),  "r"(b[6]),  "r"(b[7])
   );
-  #elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1
   __asm__ __volatile__
   (
     "V_ADD_U32   %0,  %9, %17;"
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 52eaa8121..8ccb034aa 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -60,7 +60,7 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
 
 #endif
 
-#ifdef IS_CUDA
+#if defined IS_CUDA
 
 #if ATTACK_EXEC == 11
 
@@ -164,6 +164,143 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
 #define SYNC_THREADS() __syncthreads ()
 #endif
 
+#if defined IS_HIP
+
+#if ATTACK_EXEC == 11
+
+CONSTANT_VK u32 generic_constant[8192] __attribute__((used)); // 32k
+
+#if   ATTACK_KERN == 0
+#define bfs_buf     g_bfs_buf
+#define rules_buf   ((const kernel_rule_t *) generic_constant)
+#define words_buf_s g_words_buf_s
+#define words_buf_r g_words_buf_r
+#elif ATTACK_KERN == 1
+#define bfs_buf     g_bfs_buf
+#define rules_buf   g_rules_buf
+#define words_buf_s g_words_buf_s
+#define words_buf_r g_words_buf_r
+#elif ATTACK_KERN == 3
+#define rules_buf   g_rules_buf
+#define bfs_buf     ((const bf_t *)      generic_constant)
+#define words_buf_s ((const bs_word_t *) generic_constant)
+#define words_buf_r ((const u32x *)      generic_constant)
+#endif
+
+#endif
+
+DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
+{
+  volatile const u32 val = 1;
+
+  return atomicSub (p, val);
+}
+
+DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
+{
+  volatile const u32 val = 1;
+
+  return atomicAdd (p, val);
+}
+
+DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
+{
+  return atomicOr (p, val);
+}
+
+DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
+{
+  return (blockIdx.x * blockDim.x) + threadIdx.x;
+}
+
+DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
+{
+  return threadIdx.x;
+}
+
+DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
+{
+  // verify
+  return blockDim.x;
+}
+
+DECLSPEC u32x rotl32 (const u32x a, const int n)
+{
+  return ((a << n) | ((a >> (32 - n))));
+}
+
+DECLSPEC u32x rotr32 (const u32x a, const int n)
+{
+  return ((a >> n) | ((a << (32 - n))));
+}
+
+DECLSPEC u32 rotl32_S (const u32 a, const int n)
+{
+  return ((a << n) | ((a >> (32 - n))));
+}
+
+DECLSPEC u32 rotr32_S (const u32 a, const int n)
+{
+  return ((a >> n) | ((a << (32 - n))));
+}
+
+DECLSPEC u64x rotl64 (const u64x a, const int n)
+{
+  return rotr64 (a, 64 - n);
+}
+
+DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n)
+{
+  u32 r = 0;
+
+  __asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n));
+
+  return r;
+}
+
+DECLSPEC u64x rotr64 (const u64x a, const int n)
+{
+  #if VECT_SIZE == 1
+  return rotr64_S (a, n);
+  #else
+  return ((a >> n) | ((a << (64 - n))));
+  #endif
+}
+
+DECLSPEC u64 rotl64_S (const u64 a, const int n)
+{
+  return rotr64_S (a, 64 - n);
+}
+
+DECLSPEC u64 rotr64_S (const u64 a, const int n)
+{
+  vconv64_t in;
+
+  in.v64 = a;
+
+  const u32 a0 = in.v32.a;
+  const u32 a1 = in.v32.b;
+
+  vconv64_t out;
+
+  if (n < 32)
+  {
+    out.v32.a = amd_bitalign_S (a1, a0, n);
+    out.v32.b = amd_bitalign_S (a0, a1, n);
+  }
+  else
+  {
+    out.v32.a = amd_bitalign_S (a0, a1, n - 32);
+    out.v32.b = amd_bitalign_S (a1, a0, n - 32);
+  }
+
+  return out.v64;
+}
+
+#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
+#define SYNC_THREADS() __syncthreads ()
+#endif
+
 #ifdef IS_OPENCL
 
 DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index c405bed0e..c65891a74 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -43,4 +43,26 @@ DECLSPEC u64  rotr64_S (const u64  a, const int n);
 #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
 #endif
 
+#ifdef IS_HIP
+DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
+DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
+DECLSPEC u32 hc_atomic_or  (volatile GLOBAL_AS u32 *p, volatile const u32 val);
+
+DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
+
+DECLSPEC u32x rotl32   (const u32x a, const int n);
+DECLSPEC u32x rotr32   (const u32x a, const int n);
+DECLSPEC u32  rotl32_S (const u32  a, const int n);
+DECLSPEC u32  rotr32_S (const u32  a, const int n);
+DECLSPEC u64x rotl64   (const u64x a, const int n);
+DECLSPEC u64x rotr64   (const u64x a, const int n);
+DECLSPEC u64  rotl64_S (const u64  a, const int n);
+DECLSPEC u64  rotr64_S (const u64  a, const int n);
+
+//#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
+#define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
+#endif
+
 #endif // _INC_PLATFORM_H
diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl
index dc3754907..026198f09 100644
--- a/OpenCL/inc_rp_optimized.cl
+++ b/OpenCL/inc_rp_optimized.cl
@@ -781,7 +781,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
 
   const int offset_switch = offset / 4;
 
-  #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
   const u32 src_r00 = src_r0[0];
   const u32 src_r01 = src_r0[1];
   const u32 src_r02 = src_r0[2];
@@ -884,7 +884,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
   }
   #endif
 
-  #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
+  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
 
   const int offset_mod_4 = offset & 3;
 
@@ -894,7 +894,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
   const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
   #endif
 
-  #if defined IS_AMD
+  #if (defined IS_AMD || defined IS_HIP)
   const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
   #endif
 
@@ -1359,11 +1359,7 @@ DECLSPEC u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c
   const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
   const u32 mr = ~ml;
 
-  #ifdef IS_AMD
   const int p0_switch = p0 / 4;
-  #else
-  const int p0_switch = p0 / 4;
-  #endif
 
   switch (p0_switch)
   {
@@ -1466,11 +1462,7 @@ DECLSPEC u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const
   const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
   const u32 mr = ~ml;
 
-  #ifdef IS_AMD
   const int p0_switch = p0 / 4;
-  #else
-  const int p0_switch = p0 / 4;
-  #endif
 
   switch (p0_switch)
   {
@@ -1552,11 +1544,7 @@ DECLSPEC u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED cons
 
   const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
 
-  #ifdef IS_AMD
   const int p0_switch = p0 / 4;
-  #else
-  const int p0_switch = p0 / 4;
-  #endif
 
   switch (p0_switch)
   {
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 4c608abe0..cbd2fde69 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -68,7 +68,7 @@ typedef u64  u64x;
 #define make_u64x (u64)
 
 #else
-#ifdef IS_CUDA
+#if defined IS_CUDA
 
 #if VECT_SIZE == 2
 
@@ -824,6 +824,766 @@ typedef __device_builtin__ struct u64x u64x;
 #define make_u32x u32x
 #define make_u64x u64x
 
+#elif defined IS_HIP
+
+// seems to work, but slow
+
+/*
+#if VECT_SIZE == 2
+
+struct u8x
+{
+  u8 s0;
+  u8 s1;
+
+  inline __device__  u8x (const u8 a, const u8 b) : s0(a), s1(b) { }
+  inline __device__  u8x (const u8 a)             : s0(a), s1(a) { }
+
+  inline __device__  u8x (void) : s0(0), s1(0) { }
+  inline __device__ ~u8x (void) { }
+};
+
+struct u16x
+{
+  u16 s0;
+  u16 s1;
+
+  inline __device__  u16x (const u16 a, const u16 b) : s0(a), s1(b) { }
+  inline __device__  u16x (const u16 a)              : s0(a), s1(a) { }
+
+  inline __device__  u16x (void) : s0(0), s1(0) { }
+  inline __device__ ~u16x (void) { }
+};
+
+struct u32x
+{
+  u32 s0;
+  u32 s1;
+
+  inline __device__  u32x (const u32 a, const u32 b) : s0(a), s1(b) { }
+  inline __device__  u32x (const u32 a)              : s0(a), s1(a) { }
+
+  inline __device__  u32x (void) : s0(0), s1(0) { }
+  inline __device__ ~u32x (void) { }
+};
+
+struct u64x
+{
+  u64 s0;
+  u64 s1;
+
+  inline __device__  u64x (const u64 a, const u64 b) : s0(a), s1(b) { }
+  inline __device__  u64x (const u64 a)              : s0(a), s1(a) { }
+
+  inline __device__  u64x (void) : s0(0), s1(0) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b));    }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;     }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1;  }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;     }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1;  }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;     }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1;  }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;     }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1;  }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;     }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
+
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;     }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1;  }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;     }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1;  }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;     }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1;  }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1));  }
+
+inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   );  }
+inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b));    }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;     }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1;  }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;     }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1;  }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;     }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1;  }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;     }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1;  }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;     }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
+
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;     }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1;  }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;     }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1;  }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;     }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1;  }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1));  }
+
+inline __device__ u64x operator %  (const u64x a, const u64  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   );  }
+inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1); }
+
+#endif
+
+#if VECT_SIZE == 4
+
+struct u8x
+{
+  u8 s0;
+  u8 s1;
+  u8 s2;
+  u8 s3;
+
+  inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u8x (const u8 a)                                     : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0) { }
+  inline __device__ ~u8x (void) { }
+};
+
+struct u16x
+{
+  u16 s0;
+  u16 s1;
+  u16 s2;
+  u16 s3;
+
+  inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u16x (const u16 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0) { }
+  inline __device__ ~u16x (void) { }
+};
+
+struct u32x
+{
+  u32 s0;
+  u32 s1;
+  u32 s2;
+  u32 s3;
+
+  inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u32x (const u32 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0) { }
+  inline __device__ ~u32x (void) { }
+};
+
+struct u64x
+{
+  u64 s0;
+  u64 s1;
+  u64 s2;
+  u64 s3;
+
+  inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u64x (const u64 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)   ); }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;     }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3;  }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;     }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3;  }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;     }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3;  }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;     }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3;  }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
+
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;     }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3;  }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;     }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3;  }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;     }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3;  }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3));  }
+
+inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   );  }
+inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)   ); }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;     }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3;  }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;     }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3;  }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;     }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3;  }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;     }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3;  }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
+
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;     }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3;  }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;     }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3;  }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;     }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3;  }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3));  }
+
+inline __device__ u64x operator %  (const u64x a, const u32  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   );  }
+inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
+
+#endif
+
+#if VECT_SIZE == 8
+
+struct u8x
+{
+  u8 s0;
+  u8 s1;
+  u8 s2;
+  u8 s3;
+  u8 s4;
+  u8 s5;
+  u8 s6;
+  u8 s7;
+
+  inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u8x (const u8 a)                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
+  inline __device__ ~u8x (void) { }
+};
+
+struct u16x
+{
+  u16 s0;
+  u16 s1;
+  u16 s2;
+  u16 s3;
+  u16 s4;
+  u16 s5;
+  u16 s6;
+  u16 s7;
+
+  inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u16x (const u16 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
+  inline __device__ ~u16x (void) { }
+};
+
+struct u32x
+{
+  u32 s0;
+  u32 s1;
+  u32 s2;
+  u32 s3;
+  u32 s4;
+  u32 s5;
+  u32 s6;
+  u32 s7;
+
+  inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u32x (const u32 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
+  inline __device__ ~u32x (void) { }
+};
+
+struct u64x
+{
+  u64 s0;
+  u64 s1;
+  u64 s2;
+  u64 s3;
+  u64 s4;
+  u64 s5;
+  u64 s6;
+  u64 s7;
+
+  inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u64x (const u64 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)   ); }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;     }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7;  }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;     }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7;  }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;     }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7;  }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;     }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7;  }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
+
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;     }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7;  }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;     }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7;  }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;     }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7;  }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7));  }
+
+inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b)   );  }
+inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)   ); }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;     }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7;  }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;     }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7;  }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;     }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7;  }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;     }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7;  }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
+
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;     }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7;  }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;     }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7;  }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;     }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7;  }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7));  }
+
+inline __device__ u64x operator %  (const u64x a, const u64  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b)   );  }
+inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
+
+#endif
+
+#if VECT_SIZE == 16
+
+struct u8x
+{
+  u8 s0;
+  u8 s1;
+  u8 s2;
+  u8 s3;
+  u8 s4;
+  u8 s5;
+  u8 s6;
+  u8 s7;
+  u8 s8;
+  u8 s9;
+  u8 sa;
+  u8 sb;
+  u8 sc;
+  u8 sd;
+  u8 se;
+  u8 sf;
+
+  inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u8x (const u8 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
+  inline __device__ ~u8x (void) { }
+};
+
+struct u16x
+{
+  u16 s0;
+  u16 s1;
+  u16 s2;
+  u16 s3;
+  u16 s4;
+  u16 s5;
+  u16 s6;
+  u16 s7;
+  u16 s8;
+  u16 s9;
+  u16 sa;
+  u16 sb;
+  u16 sc;
+  u16 sd;
+  u16 se;
+  u16 sf;
+
+  inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u16x (const u16 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
+  inline __device__ ~u16x (void) { }
+};
+
+struct u32x
+{
+  u32 s0;
+  u32 s1;
+  u32 s2;
+  u32 s3;
+  u32 s4;
+  u32 s5;
+  u32 s6;
+  u32 s7;
+  u32 s8;
+  u32 s9;
+  u32 sa;
+  u32 sb;
+  u32 sc;
+  u32 sd;
+  u32 se;
+  u32 sf;
+
+  inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u32x (const u32 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
+  inline __device__ ~u32x (void) { }
+};
+
+struct u64x
+{
+  u64 s0;
+  u64 s1;
+  u64 s2;
+  u64 s3;
+  u64 s4;
+  u64 s5;
+  u64 s6;
+  u64 s7;
+  u64 s8;
+  u64 s9;
+  u64 sa;
+  u64 sb;
+  u64 sc;
+  u64 sd;
+  u64 se;
+  u64 sf;
+
+  inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u64x (const u64 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)    && (a.s8 != b)    && (a.s9 != b)    && (a.sa != b)    && (a.sb != b)    && (a.sc != b)    && (a.sd != b)    && (a.se != b)    && (a.sf != b)   ); }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;    a.s8 ^= b;    a.s9 ^= b;    a.sa ^= b;    a.sb ^= b;    a.sc ^= b;    a.sd ^= b;    a.se ^= b;    a.sf ^= b;    }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;    a.s8 |= b;    a.s9 |= b;    a.sa |= b;    a.sb |= b;    a.sc |= b;    a.sd |= b;    a.se |= b;    a.sf |= b;    }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;    a.s8 &= b;    a.s9 &= b;    a.sa &= b;    a.sb &= b;    a.sc &= b;    a.sd &= b;    a.se &= b;    a.sf &= b;    }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;    a.s8 += b;    a.s9 += b;    a.sa += b;    a.sb += b;    a.sc += b;    a.sd += b;    a.se += b;    a.sf += b;    }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
+
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;    a.s8 *= b;    a.s9 *= b;    a.sa *= b;    a.sb *= b;    a.sc *= b;    a.sd *= b;    a.se *= b;    a.sf *= b;    }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;    a.s8 >>= b;    a.s9 >>= b;    a.sa >>= b;    a.sb >>= b;    a.sc >>= b;    a.sd >>= b;    a.se >>= b;    a.sf >>= b;    }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;    a.s8 <<= b;    a.s9 <<= b;    a.sa <<= b;    a.sb <<= b;    a.sc <<= b;    a.sd <<= b;    a.se <<= b;    a.sf <<= b;    }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b),    (a.s8 >> b),    (a.s9 >> b)   , (a.sa >> b),    (a.sb >> b)   , (a.sc >> b),    (a.sd >> b)   , (a.se >> b),    (a.sf >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b),    (a.s8 ^  b),    (a.s9 ^  b)   , (a.sa ^  b),    (a.sb ^  b)   , (a.sc ^  b),    (a.sd ^  b)   , (a.se ^  b),    (a.sf ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7), (a.s8 ^  b.s8), (a.s9 ^  b.s9), (a.sa ^  b.sa), (a.sb ^  b.sb), (a.sc ^  b.sc), (a.sd ^  b.sd), (a.se ^  b.se), (a.sf ^  b.sf));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b),    (a.s8 |  b),    (a.s9 |  b)   , (a.sa |  b),    (a.sb |  b)   , (a.sc |  b),    (a.sd |  b)   , (a.se |  b),    (a.sf |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7), (a.s8 |  b.s8), (a.s9 |  b.s9), (a.sa |  b.sa), (a.sb |  b.sb), (a.sc |  b.sc), (a.sd |  b.sd), (a.se |  b.se), (a.sf |  b.sf));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b),    (a.s8 &  b),    (a.s9 &  b)   , (a.sa &  b),    (a.sb &  b)   , (a.sc &  b),    (a.sd &  b)   , (a.se &  b),    (a.sf &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7), (a.s8 &  b.s8), (a.s9 &  b.s9), (a.sa &  b.sa), (a.sb &  b.sb), (a.sc &  b.sc), (a.sd &  b.sd), (a.se &  b.se), (a.sf &  b.sf));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b),    (a.s8 +  b),    (a.s9 +  b)   , (a.sa +  b),    (a.sb +  b)   , (a.sc +  b),    (a.sd +  b)   , (a.se +  b),    (a.sf +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7), (a.s8 +  b.s8), (a.s9 +  b.s9), (a.sa +  b.sa), (a.sb +  b.sb), (a.sc +  b.sc), (a.sd +  b.sd), (a.se +  b.se), (a.sf +  b.sf));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b),    (a.s8 -  b),    (a.s9 -  b)   , (a.sa -  b),    (a.sb -  b)   , (a.sc -  b),    (a.sd -  b)   , (a.se -  b),    (a.sf -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7), (a.s8 -  b.s8), (a.s9 -  b.s9), (a.sa -  b.sa), (a.sb -  b.sb), (a.sc -  b.sc), (a.sd -  b.sd), (a.se -  b.se), (a.sf -  b.sf));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b),    (a.s8 *  b),    (a.s9 *  b)   , (a.sa *  b),    (a.sb *  b)   , (a.sc *  b),    (a.sd *  b)   , (a.se *  b),    (a.sf *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7), (a.s8 *  b.s8), (a.s9 *  b.s9), (a.sa *  b.sa), (a.sb *  b.sb), (a.sc *  b.sc), (a.sd *  b.sd), (a.se *  b.se), (a.sf *  b.sf));  }
+
+inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b),    (a.s8 %  b),    (a.s9 %  b)   , (a.sa %  b),    (a.sb %  b)   , (a.sc %  b),    (a.sd %  b)   , (a.se %  b),    (a.sf %  b)   );  }
+inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7), (a.s8 %  b.s8), (a.s9 %  b.s9), (a.sa %  b.sa), (a.sb %  b.sb), (a.sc %  b.sc), (a.sd %  b.sd), (a.se %  b.se), (a.sf %  b.sf));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)    && (a.s8 != b)    && (a.s9 != b)    && (a.sa != b)    && (a.sb != b)    && (a.sc != b)    && (a.sd != b)    && (a.se != b)    && (a.sf != b)   ); }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;    a.s8 ^= b;    a.s9 ^= b;    a.sa ^= b;    a.sb ^= b;    a.sc ^= b;    a.sd ^= b;    a.se ^= b;    a.sf ^= b;    }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;    a.s8 |= b;    a.s9 |= b;    a.sa |= b;    a.sb |= b;    a.sc |= b;    a.sd |= b;    a.se |= b;    a.sf |= b;    }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;    a.s8 &= b;    a.s9 &= b;    a.sa &= b;    a.sb &= b;    a.sc &= b;    a.sd &= b;    a.se &= b;    a.sf &= b;    }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;    a.s8 += b;    a.s9 += b;    a.sa += b;    a.sb += b;    a.sc += b;    a.sd += b;    a.se += b;    a.sf += b;    }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
+
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;    a.s8 *= b;    a.s9 *= b;    a.sa *= b;    a.sb *= b;    a.sc *= b;    a.sd *= b;    a.se *= b;    a.sf *= b;    }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;    a.s8 >>= b;    a.s9 >>= b;    a.sa >>= b;    a.sb >>= b;    a.sc >>= b;    a.sd >>= b;    a.se >>= b;    a.sf >>= b;    }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;    a.s8 <<= b;    a.s9 <<= b;    a.sa <<= b;    a.sb <<= b;    a.sc <<= b;    a.sd <<= b;    a.se <<= b;    a.sf <<= b;    }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b),    (a.s8 >> b),    (a.s9 >> b)   , (a.sa >> b),    (a.sb >> b)   , (a.sc >> b),    (a.sd >> b)   , (a.se >> b),    (a.sf >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b),    (a.s8 ^  b),    (a.s9 ^  b)   , (a.sa ^  b),    (a.sb ^  b)   , (a.sc ^  b),    (a.sd ^  b)   , (a.se ^  b),    (a.sf ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7), (a.s8 ^  b.s8), (a.s9 ^  b.s9), (a.sa ^  b.sa), (a.sb ^  b.sb), (a.sc ^  b.sc), (a.sd ^  b.sd), (a.se ^  b.se), (a.sf ^  b.sf));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b),    (a.s8 |  b),    (a.s9 |  b)   , (a.sa |  b),    (a.sb |  b)   , (a.sc |  b),    (a.sd |  b)   , (a.se |  b),    (a.sf |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7), (a.s8 |  b.s8), (a.s9 |  b.s9), (a.sa |  b.sa), (a.sb |  b.sb), (a.sc |  b.sc), (a.sd |  b.sd), (a.se |  b.se), (a.sf |  b.sf));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b),    (a.s8 &  b),    (a.s9 &  b)   , (a.sa &  b),    (a.sb &  b)   , (a.sc &  b),    (a.sd &  b)   , (a.se &  b),    (a.sf &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7), (a.s8 &  b.s8), (a.s9 &  b.s9), (a.sa &  b.sa), (a.sb &  b.sb), (a.sc &  b.sc), (a.sd &  b.sd), (a.se &  b.se), (a.sf &  b.sf));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b),    (a.s8 +  b),    (a.s9 +  b)   , (a.sa +  b),    (a.sb +  b)   , (a.sc +  b),    (a.sd +  b)   , (a.se +  b),    (a.sf +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7), (a.s8 +  b.s8), (a.s9 +  b.s9), (a.sa +  b.sa), (a.sb +  b.sb), (a.sc +  b.sc), (a.sd +  b.sd), (a.se +  b.se), (a.sf +  b.sf));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b),    (a.s8 -  b),    (a.s9 -  b)   , (a.sa -  b),    (a.sb -  b)   , (a.sc -  b),    (a.sd -  b)   , (a.se -  b),    (a.sf -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7), (a.s8 -  b.s8), (a.s9 -  b.s9), (a.sa -  b.sa), (a.sb -  b.sb), (a.sc -  b.sc), (a.sd -  b.sd), (a.se -  b.se), (a.sf -  b.sf));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b),    (a.s8 *  b),    (a.s9 *  b)   , (a.sa *  b),    (a.sb *  b)   , (a.sc *  b),    (a.sd *  b)   , (a.se *  b),    (a.sf *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7), (a.s8 *  b.s8), (a.s9 *  b.s9), (a.sa *  b.sa), (a.sb *  b.sb), (a.sc *  b.sc), (a.sd *  b.sd), (a.se *  b.se), (a.sf *  b.sf));  }
+
+inline __device__ u64x operator %  (const u64x a, const u64  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b),    (a.s8 %  b),    (a.s9 %  b)   , (a.sa %  b),    (a.sb %  b)   , (a.sc %  b),    (a.sd %  b)   , (a.se %  b),    (a.sf %  b)   );  }
+inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7), (a.s8 %  b.s8), (a.s9 %  b.s9), (a.sa %  b.sa), (a.sb %  b.sb), (a.sc %  b.sc), (a.sd %  b.sd), (a.se %  b.se), (a.sf %  b.sf));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
+
+#endif
+
+typedef struct u8x  u8x;
+typedef struct u16x u16x;
+typedef struct u32x u32x;
+typedef struct u64x u64x;
+
+#define make_u8x  u8x
+#define make_u16x u16x
+#define make_u32x u32x
+#define make_u64x u64x
+*/
+
 #else
 typedef VTYPE(uchar,  VECT_SIZE)  u8x;
 typedef VTYPE(ushort, VECT_SIZE) u16x;
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index 9299caa3f..f4c31f59a 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -10,10 +10,16 @@
 #define IS_NATIVE
 #elif defined __CUDACC__
 #define IS_CUDA
+#elif defined __HIPCC__
+#define IS_HIP
 #else
 #define IS_OPENCL
 #endif
 
+#ifdef IS_HIP
+#include <hip/hip_runtime.h>
+#endif
+
 #if defined IS_NATIVE
 #define CONSTANT_VK
 #define CONSTANT_AS
@@ -28,6 +34,13 @@
 #define LOCAL_VK    __shared__
 #define LOCAL_AS
 #define KERNEL_FQ   extern "C" __global__
+#elif defined IS_HIP
+#define CONSTANT_VK __constant__
+#define CONSTANT_AS
+#define GLOBAL_AS
+#define LOCAL_VK    __shared__
+#define LOCAL_AS
+#define KERNEL_FQ   extern "C" __global__
 #elif defined IS_OPENCL
 #define CONSTANT_VK __constant
 #define CONSTANT_AS __constant
@@ -80,6 +93,8 @@
 #elif VENDOR_ID == (1 << 6)
 #define IS_POCL
 #define IS_GENERIC
+#elif VENDOR_ID == (1 << 8)
+#define IS_AMD_USE_HIP
 #else
 #define IS_GENERIC
 #endif
@@ -113,6 +128,8 @@
 
 #if defined IS_AMD && defined IS_GPU
 #define DECLSPEC inline static
+#elif defined IS_HIP
+#define DECLSPEC inline static __device__
 #else
 #define DECLSPEC
 #endif
@@ -138,6 +155,11 @@
 #define USE_ROTATE
 #endif
 
+#ifdef IS_HIP
+#define USE_BITSELECT
+#define USE_ROTATE
+#endif
+
 #ifdef IS_ROCM
 #define USE_BITSELECT
 #define USE_ROTATE
diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl
index 6ea000442..19f7153ff 100644
--- a/OpenCL/m00500-optimized.cl
+++ b/OpenCL/m00500-optimized.cl
@@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   u32 tmp3;
   u32 tmp4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -139,7 +139,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   u32 tmp3;
   u32 tmp4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -246,7 +246,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   u32 tmp1;
   u32 tmp2;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
 
diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl
index f7a8ad45c..c2c4245e1 100644
--- a/OpenCL/m01500_a3-pure.cl
+++ b/OpenCL/m01500_a3-pure.cl
@@ -19,7 +19,7 @@
 #define KXX_DECL
 #endif
 
-#ifdef IS_AMD
+#if (defined IS_AMD || defined IS_HIP)
 #define KXX_DECL
 #endif
 
@@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
 #endif
 #endif
 
-#if defined IS_AMD || defined IS_GENERIC
+#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
 
 /*
  * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl
index 6489a04b8..cfaad44cc 100644
--- a/OpenCL/m01600-optimized.cl
+++ b/OpenCL/m01600-optimized.cl
@@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   u32 tmp3;
   u32 tmp4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -138,7 +138,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   u32 tmp3;
   u32 tmp4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -245,7 +245,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   u32 tmp1;
   u32 tmp2;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
 
diff --git a/OpenCL/m01700_a0-optimized.cl b/OpenCL/m01700_a0-optimized.cl
index b78fa21e2..dcc4c217e 100644
--- a/OpenCL/m01700_a0-optimized.cl
+++ b/OpenCL/m01700_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01700_a1-optimized.cl b/OpenCL/m01700_a1-optimized.cl
index 1c0f82f8e..6ca96c818 100644
--- a/OpenCL/m01700_a1-optimized.cl
+++ b/OpenCL/m01700_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl
index 62c60582d..6444cfae0 100644
--- a/OpenCL/m01700_a3-optimized.cl
+++ b/OpenCL/m01700_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01710_a0-optimized.cl b/OpenCL/m01710_a0-optimized.cl
index a58b84b93..2c72b062f 100644
--- a/OpenCL/m01710_a0-optimized.cl
+++ b/OpenCL/m01710_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01710_a1-optimized.cl b/OpenCL/m01710_a1-optimized.cl
index c7779551c..45111549b 100644
--- a/OpenCL/m01710_a1-optimized.cl
+++ b/OpenCL/m01710_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl
index d8d03e473..76a331f7e 100644
--- a/OpenCL/m01710_a3-optimized.cl
+++ b/OpenCL/m01710_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01720_a0-optimized.cl b/OpenCL/m01720_a0-optimized.cl
index 03c365680..bfb0bb37e 100644
--- a/OpenCL/m01720_a0-optimized.cl
+++ b/OpenCL/m01720_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01720_a1-optimized.cl b/OpenCL/m01720_a1-optimized.cl
index fd93cf79e..ade402112 100644
--- a/OpenCL/m01720_a1-optimized.cl
+++ b/OpenCL/m01720_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl
index 58b50041c..5dfc2b9d2 100644
--- a/OpenCL/m01720_a3-optimized.cl
+++ b/OpenCL/m01720_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01730_a0-optimized.cl b/OpenCL/m01730_a0-optimized.cl
index 7e59e14f0..14a965c4b 100644
--- a/OpenCL/m01730_a0-optimized.cl
+++ b/OpenCL/m01730_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01730_a1-optimized.cl b/OpenCL/m01730_a1-optimized.cl
index 99fffaf7c..0fe9c945d 100644
--- a/OpenCL/m01730_a1-optimized.cl
+++ b/OpenCL/m01730_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl
index e151b8195..e533b3e95 100644
--- a/OpenCL/m01730_a3-optimized.cl
+++ b/OpenCL/m01730_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01740_a0-optimized.cl b/OpenCL/m01740_a0-optimized.cl
index c18340e6b..b9be203f5 100644
--- a/OpenCL/m01740_a0-optimized.cl
+++ b/OpenCL/m01740_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01740_a1-optimized.cl b/OpenCL/m01740_a1-optimized.cl
index c0fd01233..599cdcbad 100644
--- a/OpenCL/m01740_a1-optimized.cl
+++ b/OpenCL/m01740_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl
index bc93dc5c2..d432b4f4f 100644
--- a/OpenCL/m01740_a3-optimized.cl
+++ b/OpenCL/m01740_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl
index cc2d2af72..5bcd35d63 100644
--- a/OpenCL/m02500-pure.cl
+++ b/OpenCL/m02500-pure.cl
@@ -775,7 +775,7 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)
     s_te4[i] = te4[i];
   }
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA || defined IS_HIP
   __syncthreads();
   #else
   SYNC_THREADS ();
diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl
index a44b6f065..65beaabda 100644
--- a/OpenCL/m03000_a3-pure.cl
+++ b/OpenCL/m03000_a3-pure.cl
@@ -19,7 +19,7 @@
 #define KXX_DECL
 #endif
 
-#ifdef IS_AMD
+#if (defined IS_AMD || defined IS_HIP)
 #define KXX_DECL
 #endif
 
@@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
 #endif
 #endif
 
-#if defined IS_AMD || defined IS_GENERIC
+#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
 
 /*
  * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl
index b247b05e0..38099159f 100644
--- a/OpenCL/m05800-optimized.cl
+++ b/OpenCL/m05800-optimized.cl
@@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
   u32 tmp4;
   u32 tmp5;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl
index c3d320c95..b7c9ddddd 100644
--- a/OpenCL/m06300-optimized.cl
+++ b/OpenCL/m06300-optimized.cl
@@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
   u32 tmp3;
   u32 tmp4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -135,7 +135,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
   u32 tmp3;
   u32 tmp4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
   u32 in2 = append[2];
@@ -242,7 +242,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
   u32 tmp1;
   u32 tmp2;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   u32 in0 = append[0];
   u32 in1 = append[1];
 
diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl
index df1f3478f..7efa5c94e 100644
--- a/OpenCL/m07400-optimized.cl
+++ b/OpenCL/m07400-optimized.cl
@@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
   u32 in2 = append[2];
   u32 in3 = append[3];
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -165,7 +165,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
   u32 in2 = append[2];
   u32 in3 = append[3];
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -322,7 +322,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
   u32 in3 = append[3];
   u32 in4 = append[4];
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -456,7 +456,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
   u32 in3 = append[3];
   u32 in4 = append[4];
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@@ -756,7 +756,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
   u32 in2 = append[2];
   u32 in3 = append[3];
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@@ -915,7 +915,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
   u32 in3 = append[3];
   u32 in4 = 0x80000000;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@@ -1074,7 +1074,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
   u32 in3 = append[3];
   u32 in4 = append[4];
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
   const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
   const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
diff --git a/OpenCL/m07700_a0-optimized.cl b/OpenCL/m07700_a0-optimized.cl
index 81a69e83e..165691e6d 100644
--- a/OpenCL/m07700_a0-optimized.cl
+++ b/OpenCL/m07700_a0-optimized.cl
@@ -17,13 +17,15 @@
 #include "inc_hash_md5.cl"
 #endif
 
+/*
 #ifdef IS_AMD
 #define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
 #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
 #else
+*/
+
 #define GETCHAR(a,p)   ((u8 *)(a))[(p)]
 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
-#endif
 
 #define SETSHIFTEDINT(a,n,v)                  \
 {                                             \
diff --git a/OpenCL/m07700_a1-optimized.cl b/OpenCL/m07700_a1-optimized.cl
index 9431d66a6..77a3bb26a 100644
--- a/OpenCL/m07700_a1-optimized.cl
+++ b/OpenCL/m07700_a1-optimized.cl
@@ -15,13 +15,8 @@
 #include "inc_hash_md5.cl"
 #endif
 
-#ifdef IS_AMD
-#define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
-#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
-#else
 #define GETCHAR(a,p)   ((u8 *)(a))[(p)]
 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
-#endif
 
 #define SETSHIFTEDINT(a,n,v)                  \
 {                                             \
diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl
index 53dbb1fe2..e867cb070 100644
--- a/OpenCL/m07700_a3-optimized.cl
+++ b/OpenCL/m07700_a3-optimized.cl
@@ -15,13 +15,8 @@
 #include "inc_hash_md5.cl"
 #endif
 
-#ifdef IS_AMD
-#define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
-#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
-#else
 #define GETCHAR(a,p)   ((u8 *)(a))[(p)]
 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
-#endif
 
 CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl
index b530785a7..55430df32 100644
--- a/OpenCL/m07701_a0-optimized.cl
+++ b/OpenCL/m07701_a0-optimized.cl
@@ -17,13 +17,8 @@
 #include "inc_hash_md5.cl"
 #endif
 
-#ifdef IS_AMD
-#define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
-#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
-#else
 #define GETCHAR(a,p)   ((u8 *)(a))[(p)]
 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
-#endif
 
 #define SETSHIFTEDINT(a,n,v)                  \
 {                                             \
diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl
index e1ae00412..425bb3a04 100644
--- a/OpenCL/m07701_a1-optimized.cl
+++ b/OpenCL/m07701_a1-optimized.cl
@@ -15,13 +15,8 @@
 #include "inc_hash_md5.cl"
 #endif
 
-#ifdef IS_AMD
-#define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
-#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
-#else
 #define GETCHAR(a,p)   ((u8 *)(a))[(p)]
 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
-#endif
 
 #define SETSHIFTEDINT(a,n,v)                  \
 {                                             \
diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl
index ae6762e90..934c943f0 100644
--- a/OpenCL/m07701_a3-optimized.cl
+++ b/OpenCL/m07701_a3-optimized.cl
@@ -15,13 +15,8 @@
 #include "inc_hash_md5.cl"
 #endif
 
-#ifdef IS_AMD
-#define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
-#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
-#else
 #define GETCHAR(a,p)   ((u8 *)(a))[(p)]
 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
-#endif
 
 CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl
index 495e6fde6..67bdf8d61 100644
--- a/OpenCL/m08000_a0-optimized.cl
+++ b/OpenCL/m08000_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
@@ -143,7 +143,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
 
   ROUND_STEP_Z (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_STEP_Z (16);
   ROUND_STEP_Z (32);
   ROUND_STEP_Z (48);
diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl
index 93406b2e5..01c925243 100644
--- a/OpenCL/m08000_a1-optimized.cl
+++ b/OpenCL/m08000_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
@@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
 
   ROUND_STEP_Z (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_STEP_Z (16);
   ROUND_STEP_Z (32);
   ROUND_STEP_Z (48);
diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl
index 2fe762be3..14ee12d12 100644
--- a/OpenCL/m08000_a3-optimized.cl
+++ b/OpenCL/m08000_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
@@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
 
   ROUND_STEP_Z (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_STEP_Z (16);
   ROUND_STEP_Z (32);
   ROUND_STEP_Z (48);
diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl
index 162b6b9a4..0f282b509 100644
--- a/OpenCL/m08900-pure.cl
+++ b/OpenCL/m08900-pure.cl
@@ -24,7 +24,7 @@ typedef struct
 
 } scrypt_tmp_t;
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
 inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
@@ -57,7 +57,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 #define SALSA20_2R()                        \
 {                                           \
@@ -303,7 +303,7 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
     digest[6] = sha256_hmac_ctx2.opad.h[6];
     digest[7] = sha256_hmac_ctx2.opad.h[7];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
     const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
     #else
@@ -331,7 +331,7 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
 
     uint4 X[4];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
     X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
     X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
@@ -441,7 +441,7 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
 
     uint4 T[4];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
     T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
     T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl
index bf311a22e..a9b50a6ac 100644
--- a/OpenCL/m10700-optimized.cl
+++ b/OpenCL/m10700-optimized.cl
@@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
 
     u32 i;
 
-    #if defined IS_AMD || defined IS_GENERIC
+    #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
     for (i = 0; i < pd; i++) sc[idx++] = pw[i];
                              sc[idx++] = pw[i]
                                        | hc_bytealign_be (bl[0],         0, pm4);
@@ -263,7 +263,7 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
   const u32 om = m % 4;
   const u32 od = m / 4;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
   pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
   pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
diff --git a/OpenCL/m10800_a0-optimized.cl b/OpenCL/m10800_a0-optimized.cl
index b4fc83464..26d5ac84f 100644
--- a/OpenCL/m10800_a0-optimized.cl
+++ b/OpenCL/m10800_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m10800_a1-optimized.cl b/OpenCL/m10800_a1-optimized.cl
index 5ae9309cd..f09627684 100644
--- a/OpenCL/m10800_a1-optimized.cl
+++ b/OpenCL/m10800_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl
index 922404d67..eb3a08a41 100644
--- a/OpenCL/m10800_a3-optimized.cl
+++ b/OpenCL/m10800_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl
index 469365ae6..be42e185b 100644
--- a/OpenCL/m11600-pure.cl
+++ b/OpenCL/m11600-pure.cl
@@ -42,7 +42,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl
index ce1b2cb53..f8ed47771 100644
--- a/OpenCL/m12500-pure.cl
+++ b/OpenCL/m12500-pure.cl
@@ -37,7 +37,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
diff --git a/OpenCL/m13600-pure.cl b/OpenCL/m13600-pure.cl
index 1e0ff6d1c..0202cf0bf 100644
--- a/OpenCL/m13600-pure.cl
+++ b/OpenCL/m13600-pure.cl
@@ -37,7 +37,7 @@ typedef struct zip2
   u32 verify_bytes;
   u32 compress_length;
   u32 data_len;
-  u32 data_buf[2048];
+  u32 data_buf[0x4000000];
   u32 auth_len;
   u32 auth_buf[4];
 
diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl
index a036044b0..6758ffbd4 100644
--- a/OpenCL/m13800_a0-optimized.cl
+++ b/OpenCL/m13800_a0-optimized.cl
@@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   u32x tmp15;
   u32x tmp16;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp00 = hc_bytealign_be (        0, carry[ 0], offset);
   tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
   tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl
index 3b462466e..85e711b94 100644
--- a/OpenCL/m13800_a1-optimized.cl
+++ b/OpenCL/m13800_a1-optimized.cl
@@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   u32x tmp15;
   u32x tmp16;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp00 = hc_bytealign_be (        0, carry[ 0], offset);
   tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
   tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl
index 9ad06a344..65b759de0 100644
--- a/OpenCL/m13800_a3-optimized.cl
+++ b/OpenCL/m13800_a3-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
   u32x tmp15;
   u32x tmp16;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp00 = hc_bytealign_be (        0, carry[ 0], offset);
   tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
   tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl
index f44e0bbfc..84b419923 100644
--- a/OpenCL/m14000_a3-pure.cl
+++ b/OpenCL/m14000_a3-pure.cl
@@ -19,7 +19,7 @@
 #define KXX_DECL
 #endif
 
-#ifdef IS_AMD
+#if (defined IS_AMD || defined IS_HIP)
 #define KXX_DECL
 #endif
 
@@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
 #endif
 #endif
 
-#if defined IS_AMD || defined IS_GENERIC
+#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
 
 /*
  * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl
index 4e46bb4e4..70b4ed4fd 100644
--- a/OpenCL/m15700-pure.cl
+++ b/OpenCL/m15700-pure.cl
@@ -31,7 +31,7 @@ typedef struct ethereum_scrypt
 
 } ethereum_scrypt_t;
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
 inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
@@ -64,7 +64,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 #define SALSA20_2R()                        \
 {                                           \
@@ -439,7 +439,7 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
     digest[6] = sha256_hmac_ctx2.opad.h[6];
     digest[7] = sha256_hmac_ctx2.opad.h[7];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
     const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
     #else
@@ -467,7 +467,7 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
 
     uint4 X[4];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
     X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
     X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
@@ -577,7 +577,7 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
 
     uint4 T[4];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
     T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
     T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
diff --git a/OpenCL/m21000_a0-optimized.cl b/OpenCL/m21000_a0-optimized.cl
index 137a633e2..d0f88c06a 100644
--- a/OpenCL/m21000_a0-optimized.cl
+++ b/OpenCL/m21000_a0-optimized.cl
@@ -89,7 +89,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m21000_a1-optimized.cl b/OpenCL/m21000_a1-optimized.cl
index 67ba5514e..f9110176d 100644
--- a/OpenCL/m21000_a1-optimized.cl
+++ b/OpenCL/m21000_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
@@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m21000_a3-optimized.cl b/OpenCL/m21000_a3-optimized.cl
index a0c23197e..cdb3f0038 100644
--- a/OpenCL/m21000_a3-optimized.cl
+++ b/OpenCL/m21000_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
@@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m22000-pure.cl b/OpenCL/m22000-pure.cl
index 93774795e..cfe645bc7 100644
--- a/OpenCL/m22000-pure.cl
+++ b/OpenCL/m22000-pure.cl
@@ -797,7 +797,7 @@ KERNEL_FQ void m22000_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t))
     s_te4[i] = te4[i];
   }
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA || defined IS_HIP
   __syncthreads();
   #else
   SYNC_THREADS ();
diff --git a/OpenCL/m22001-pure.cl b/OpenCL/m22001-pure.cl
index 95e0e0395..95431e08f 100644
--- a/OpenCL/m22001-pure.cl
+++ b/OpenCL/m22001-pure.cl
@@ -610,7 +610,7 @@ KERNEL_FQ void m22001_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t))
     s_te4[i] = te4[i];
   }
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA || defined IS_HIP
   __syncthreads();
   #else
   SYNC_THREADS ();
diff --git a/OpenCL/m22200_a0-optimized.cl b/OpenCL/m22200_a0-optimized.cl
index 112a4de7a..150380d44 100644
--- a/OpenCL/m22200_a0-optimized.cl
+++ b/OpenCL/m22200_a0-optimized.cl
@@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m22200_a1-optimized.cl b/OpenCL/m22200_a1-optimized.cl
index 3cdc1e2fe..01d192b99 100644
--- a/OpenCL/m22200_a1-optimized.cl
+++ b/OpenCL/m22200_a1-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m22200_a3-optimized.cl b/OpenCL/m22200_a3-optimized.cl
index a30697b83..48cf61584 100644
--- a/OpenCL/m22200_a3-optimized.cl
+++ b/OpenCL/m22200_a3-optimized.cl
@@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
+  #if defined IS_CUDA
   ROUND_EXPAND (); ROUND_STEP (16);
   ROUND_EXPAND (); ROUND_STEP (32);
   ROUND_EXPAND (); ROUND_STEP (48);
diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl
index 033d2b0b7..4660843a2 100644
--- a/OpenCL/m22700-pure.cl
+++ b/OpenCL/m22700-pure.cl
@@ -72,7 +72,7 @@ DECLSPEC int is_valid_bitcoinj (const u32 *w)
   return 1;
 }
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
 inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
@@ -105,7 +105,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#ifdef IS_CUDA
+#if defined IS_CUDA || defined IS_HIP
 
 #define SALSA20_2R()                        \
 {                                           \
@@ -374,7 +374,7 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
     digest[6] = sha256_hmac_ctx2.opad.h[6];
     digest[7] = sha256_hmac_ctx2.opad.h[7];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
     const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
     #else
@@ -402,7 +402,7 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
 
     uint4 X[4];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
     X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
     X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
@@ -575,7 +575,7 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
 
     uint4 T[4];
 
-    #ifdef IS_CUDA
+    #if defined IS_CUDA || defined IS_HIP
     T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
     T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
     T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl
index ec62394a1..af287574e 100644
--- a/OpenCL/m23700-pure.cl
+++ b/OpenCL/m23700-pure.cl
@@ -145,7 +145,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl
index 1629433c0..f6d345677 100644
--- a/OpenCL/m23800-pure.cl
+++ b/OpenCL/m23800-pure.cl
@@ -56,7 +56,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
   u32 tmp0;
   u32 tmp1;
 
-  #if defined IS_AMD || defined IS_GENERIC
+  #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
   tmp0 = hc_bytealign_be (0, append, func_len);
   tmp1 = hc_bytealign_be (append, 0, func_len);
   #endif
diff --git a/OpenCL/shared.cl b/OpenCL/shared.cl
index a6ae38988..3cc96e79f 100644
--- a/OpenCL/shared.cl
+++ b/OpenCL/shared.cl
@@ -126,6 +126,11 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_
   r.y = value;
   r.z = value;
   r.w = value;
+  #elif defined IS_HIP
+  r.x = value;
+  r.y = value;
+  r.z = value;
+  r.w = value;
   #endif
 
   buf[gid] = r;
diff --git a/docs/changes.txt b/docs/changes.txt
index d9e895e53..b5df97f59 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -1,22 +1,25 @@
-* changes v6.2.2 -> v6.2.x
+* changes v6.2.3 -> v6.2.x
+
+* changes v6.2.2 -> v6.2.3
 
 ##
 ## Algorithms
 ##
 
-- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 512 bit + boot-mode
-- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1024 bit + boot-mode
-- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1536 bit + boot-mode
 - Added hash-mode: AES-128-ECB NOKDF (PT = $salt, key = $pass)
 - Added hash-mode: AES-192-ECB NOKDF (PT = $salt, key = $pass)
 - Added hash-mode: AES-256-ECB NOKDF (PT = $salt, key = $pass)
 - Added hash-mode: iPhone passcode (UID key + System Keybag)
 - Added hash-mode: MetaMask Wallet
+- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 512 bit  + boot-mode
+- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1024 bit + boot-mode
+- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1536 bit + boot-mode
 
 ##
 ## Features
 ##
 
+- Added new backend support for HIP, an OpenCL alternative API for AMD GPUs (similar to CUDA for NVIDIA GPUs)
 - Added option --markov-inverse to inverse markov statistics, with the idea of reversing the order of the password candidates
 - Added temperature watchdog and fanspeed readings for CPU and GPU on macOS using iokit
 - Added temperature watchdog and utilization for CPU on linux using sysfs and procfs
@@ -41,13 +44,13 @@
 
 - Alias Devices: Show a warning in case the user specifically listed a device to use which in a later step is skipped because it is an alias of another active device
 - Backend Information: Added new column showing the PCI Address per CUDA/OpenCL device to easier identify broken cards
+- Bcrypt-SHA1/MD5 Kernels: Get rid of local memory use for binary to hex conversion to avoid false negatives on several OpenCL runtimes
 - CPU Affinity: Allow mask up to 64 processors in Windows and remove call to SetThreadAffinityMask as SetProcessAffinityMask limits all threads in process
 - Debug Rules: HEX-ify rules debug non-rules outputs that contain colons
 - KeePass: Increase supported size for KeePass 1 databases from 300kB to 16MB
 - Potfile: Disable potfile for hash-mode 99999
 - VeraCrypt: Increase password length support for non-boot volumes from 64 to 128
 - WPA Kernels: Increased performance by 3.5% for backend devices controlled by CUDA backend
-- Bcrypt-SHA1/MD5 Kernels: Get rid of local memory use for binary to hex conversion to avoid false negatives on several OpenCL runtimes
 
 ##
 ## Technical
@@ -63,6 +66,7 @@
 - OpenCL Runtime: Workaround JiT crash (SC failed. No reason given.) on macOS by limiting local memory allocations to 32k
 - Status View: Include time and duration info when pausing and resuming
 - Tests: Changed tests for VeraCrypt from -a 0 to -a 3, because password extension is not available to all shells
+- WinZip Kernel: Increase supported data length from 8k to 16mb
 
 * changes v6.2.1 -> v6.2.2
 
diff --git a/docs/readme.txt b/docs/readme.txt
index 07605c0cc..db616273c 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -7,7 +7,7 @@
            |_____|     /___________/     |_____|  /_____/        /_______|
                  :                             :                         :
 
-hashcat v6.2.2
+hashcat v6.2.3
 ==============
 
 AMD GPUs on Linux require "RadeonOpenCompute (ROCm)" Software Platform (3.1 or later)
diff --git a/extra/tab_completion/hashcat.sh b/extra/tab_completion/hashcat.sh
index a1bd0842c..54440e636 100644
--- a/extra/tab_completion/hashcat.sh
+++ b/extra/tab_completion/hashcat.sh
@@ -411,7 +411,7 @@ _hashcat_contains ()
 
 _hashcat ()
 {
-  local VERSION=6.2.2
+  local VERSION=6.2.3
 
   local ATTACK_MODES="0 1 3 6 7 9"
   local HCCAPX_MESSAGE_PAIRS="0 1 2 3 4 5"
diff --git a/hashcat.hctune b/hashcat.hctune
index 077e42e85..b11441ba2 100644
--- a/hashcat.hctune
+++ b/hashcat.hctune
@@ -274,6 +274,12 @@ GeForce_RTX_3070                                ALIAS_nv_sm50_or_higher
 GeForce_RTX_3080                                ALIAS_nv_sm50_or_higher
 GeForce_RTX_3090                                ALIAS_nv_sm50_or_higher
 
+##
+## Unmapped GPU
+##
+
+Device_738c                                     ALIAS_AMD_MI100
+
 #############
 ## ENTRIES ##
 #############
@@ -498,14 +504,20 @@ GeForce_RTX_2080_Ti                             *       9300    1     532
 GeForce_RTX_2080_Ti                             *       15700   1      68       A
 GeForce_RTX_2080_Ti                             *       22700   1      68       A
 
+## 4GB
+AMD_Radeon_(TM)_RX_480_Graphics                 *       8900    1      14       A
+AMD_Radeon_(TM)_RX_480_Graphics                 *       9300    1     126       A
+AMD_Radeon_(TM)_RX_480_Graphics                 *       15700   1      14       A
+AMD_Radeon_(TM)_RX_480_Graphics                 *       22700   1      14       A
+
 ## 8GB
 Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       8900    1      28       A
 Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       9300    1     442       A
 Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       15700   1      28       A
 Vega_10_XL/XT_[Radeon_RX_Vega_56/64]            *       22700   1      28       A
 
-## 4GB
-AMD_Radeon_(TM)_RX_480_Graphics                 *       8900    1      14       A
-AMD_Radeon_(TM)_RX_480_Graphics                 *       9300    1     126       A
-AMD_Radeon_(TM)_RX_480_Graphics                 *       15700   1      14       A
-AMD_Radeon_(TM)_RX_480_Graphics                 *       22700   1      14       A
+## 32GB
+ALIAS_AMD_MI100                                 *       8900    1      76       A
+ALIAS_AMD_MI100                                 *       9300    1      63       A
+ALIAS_AMD_MI100                                 *       15700   1      76       A
+ALIAS_AMD_MI100                                 *       22700   1      76       A
diff --git a/include/backend.h b/include/backend.h
index 65132d2be..a3d997ca8 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -22,14 +22,20 @@ static const char CL_VENDOR_MESA[]            = "Mesa";
 static const char CL_VENDOR_NV[]              = "NVIDIA Corporation";
 static const char CL_VENDOR_POCL[]            = "The pocl project";
 
-int  cuda_init   (hashcat_ctx_t *hashcat_ctx);
-void cuda_close  (hashcat_ctx_t *hashcat_ctx);
+int  cuda_init    (hashcat_ctx_t *hashcat_ctx);
+void cuda_close   (hashcat_ctx_t *hashcat_ctx);
 
-int  nvrtc_init  (hashcat_ctx_t *hashcat_ctx);
-void nvrtc_close (hashcat_ctx_t *hashcat_ctx);
+int  hip_init     (hashcat_ctx_t *hashcat_ctx);
+void hip_close    (hashcat_ctx_t *hashcat_ctx);
 
-int  ocl_init    (hashcat_ctx_t *hashcat_ctx);
-void ocl_close   (hashcat_ctx_t *hashcat_ctx);
+int  ocl_init     (hashcat_ctx_t *hashcat_ctx);
+void ocl_close    (hashcat_ctx_t *hashcat_ctx);
+
+int  nvrtc_init   (hashcat_ctx_t *hashcat_ctx);
+void nvrtc_close  (hashcat_ctx_t *hashcat_ctx);
+
+int  hiprtc_init  (hashcat_ctx_t *hashcat_ctx);
+void hiprtc_close (hashcat_ctx_t *hashcat_ctx);
 
 int hc_nvrtcCreateProgram        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
 int hc_nvrtcDestroyProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
@@ -79,6 +85,54 @@ int hc_cuLinkAddData             (hashcat_ctx_t *hashcat_ctx, CUlinkState state,
 int hc_cuLinkDestroy             (hashcat_ctx_t *hashcat_ctx, CUlinkState state);
 int hc_cuLinkComplete            (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut);
 
+int hc_nvrtcCreateProgram        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
+int hc_nvrtcDestroyProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
+int hc_nvrtcCompileProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options);
+int hc_nvrtcGetProgramLogSize    (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet);
+int hc_nvrtcGetProgramLog        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log);
+int hc_nvrtcGetPTXSize           (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
+int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
+int hc_nvrtcVersion              (hashcat_ctx_t *hashcat_ctx, int *major, int *minor);
+
+int hc_hipCtxCreate              (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev);
+int hc_hipCtxDestroy             (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
+int hc_hipCtxSetCurrent          (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
+int hc_hipCtxSetCacheConfig      (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config);
+int hc_hipCtxSynchronize         (hashcat_ctx_t *hashcat_ctx);
+int hc_hipDeviceGetAttribute     (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev);
+int hc_hipDeviceGetCount         (hashcat_ctx_t *hashcat_ctx, int *count);
+int hc_hipDeviceGet              (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal);
+int hc_hipDeviceGetName          (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev);
+int hc_hipDeviceTotalMem         (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev);
+int hc_hipDriverGetVersion       (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
+int hc_hipEventCreate            (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags);
+int hc_hipEventDestroy           (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
+int hc_hipEventElapsedTime       (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd);
+int hc_hipEventQuery             (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
+int hc_hipEventRecord            (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream);
+int hc_hipEventSynchronize       (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
+int hc_hipFuncGetAttribute       (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc);
+//int hc_hipFuncSetAttribute       (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value);
+int hc_hipInit                   (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
+int hc_hipLaunchKernel           (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra);
+int hc_hipMemAlloc               (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize);
+int hc_hipMemcpyDtoD             (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount);
+int hc_hipMemcpyDtoH             (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount);
+int hc_hipMemcpyHtoD             (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
+int hc_hipMemFree                (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr);
+int hc_hipModuleGetFunction      (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name);
+int hc_hipModuleLoadDataEx       (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues);
+int hc_hipModuleUnload           (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod);
+int hc_hipStreamCreate           (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags);
+int hc_hipStreamDestroy          (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
+int hc_hipStreamSynchronize      (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
+int hc_hipCtxPushCurrent         (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
+int hc_hipCtxPopCurrent          (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx);
+int hc_hipLinkCreate             (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut);
+int hc_hipLinkAddData            (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues);
+int hc_hipLinkDestroy            (hashcat_ctx_t *hashcat_ctx, HIPlinkState state);
+int hc_hipLinkComplete           (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **cubinOut, size_t *sizeOut);
+
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCompileProgram          (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
@@ -121,15 +175,20 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
 void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 pws_cnt, const u8 chr);
 
-int run_cuda_kernel_atinit    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
-int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
-int run_cuda_kernel_memset    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
-int run_cuda_kernel_bzero     (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
+int run_cuda_kernel_atinit          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
+int run_cuda_kernel_utf8toutf16le   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
+int run_cuda_kernel_memset          (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
+int run_cuda_kernel_bzero           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
 
-int run_opencl_kernel_atinit  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
+int run_hip_kernel_atinit           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
+int run_hip_kernel_utf8toutf16le    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
+int run_hip_kernel_memset           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size);
+int run_hip_kernel_bzero            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size);
+
+int run_opencl_kernel_atinit        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
 int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
-int run_opencl_kernel_memset  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
-int run_opencl_kernel_bzero   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
+int run_opencl_kernel_memset        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
+int run_opencl_kernel_bzero         (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
 
 int run_kernel                (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration);
 int run_kernel_mp             (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
diff --git a/include/ext_hip.h b/include/ext_hip.h
new file mode 100644
index 000000000..15840d671
--- /dev/null
+++ b/include/ext_hip.h
@@ -0,0 +1,1131 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EXT_HIP_H
+#define _EXT_HIP_H
+
+/**
+ * TODO: FIX ME
+ */
+
+#define __HIP_API_VERSION 4221131
+
+/**
+ * HIP device pointer
+ * HIPdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
+ */
+#if __HIP_API_VERSION >= 3020
+
+#if defined(_WIN64) || defined(__LP64__)
+typedef unsigned long long HIPdeviceptr;
+#else
+typedef unsigned int HIPdeviceptr;
+#endif
+
+#endif /* __HIP_API_VERSION >= 3020 */
+
+typedef int HIPdevice;                                     /**< HIP device */
+typedef struct HIPctx_st *HIPcontext;                       /**< HIP context */
+typedef struct HIPevent_st *HIPevent;                       /**< HIP event */
+typedef struct HIPfunc_st *HIPfunction;                     /**< HIP function */
+typedef struct HIPmod_st *HIPmodule;                        /**< HIP module */
+typedef struct HIPstream_st *HIPstream;                     /**< HIP stream */
+typedef struct HIPlinkState_st *HIPlinkState;
+
+
+typedef enum hipError_enum {
+    /**
+     * The API call returned with no errors. In the case of query calls, this
+     * also means that the operation being queried is complete (see
+     * ::hipEventQuery() and ::hipStreamQuery()).
+     */
+    HIP_SUCCESS                              = 0,
+
+    /**
+     * This indicates that one or more of the parameters passed to the API call
+     * is not within an acceptable range of values.
+     */
+    HIP_ERROR_INVALID_VALUE                  = 1,
+
+    /**
+     * The API call failed because it was unable to allocate enough memory to
+     * perform the requested operation.
+     */
+    HIP_ERROR_OUT_OF_MEMORY                  = 2,
+
+    /**
+     * This indicates that the HIP driver has not been initialized with
+     * ::hipInit() or that initialization has failed.
+     */
+    HIP_ERROR_NOT_INITIALIZED                = 3,
+
+    /**
+     * This indicates that the HIP driver is in the process of shutting down.
+     */
+    HIP_ERROR_DEINITIALIZED                  = 4,
+
+    /**
+     * This indicates profiler is not initialized for this run. This can
+     * happen when the application is running with external profiling tools
+     * like visual profiler.
+     */
+    HIP_ERROR_PROFILER_DISABLED              = 5,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of HIP 5.0. It is no longer an error
+     * to attempt to enable/disable the profiling via ::hipProfilerStart or
+     * ::hipProfilerStop without initialization.
+     */
+    HIP_ERROR_PROFILER_NOT_INITIALIZED       = 6,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of HIP 5.0. It is no longer an error
+     * to call hipProfilerStart() when profiling is already enabled.
+     */
+    HIP_ERROR_PROFILER_ALREADY_STARTED       = 7,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of HIP 5.0. It is no longer an error
+     * to call hipProfilerStop() when profiling is already disabled.
+     */
+    HIP_ERROR_PROFILER_ALREADY_STOPPED       = 8,
+
+    /**
+     * This indicates that no HIP-capable devices were detected by the installed
+     * HIP driver.
+     */
+    HIP_ERROR_NO_DEVICE                      = 100,
+
+    /**
+     * This indicates that the device ordinal supplied by the user does not
+     * correspond to a valid HIP device.
+     */
+    HIP_ERROR_INVALID_DEVICE                 = 101,
+
+
+    /**
+     * This indicates that the device kernel image is invalid. This can also
+     * indicate an invalid HIP module.
+     */
+    HIP_ERROR_INVALID_IMAGE                  = 200,
+
+    /**
+     * This most frequently indicates that there is no context bound to the
+     * hiprrent thread. This can also be returned if the context passed to an
+     * API call is not a valid handle (such as a context that has had
+     * ::hipCtxDestroy() invoked on it). This can also be returned if a user
+     * mixes different API versions (i.e. 3010 context with 3020 API calls).
+     * See ::hipCtxGetApiVersion() for more details.
+     */
+    HIP_ERROR_INVALID_CONTEXT                = 201,
+
+    /**
+     * This indicated that the context being supplied as a parameter to the
+     * API call was already the active context.
+     * \deprecated
+     * This error return is deprecated as of HIP 3.2. It is no longer an
+     * error to attempt to push the active context via ::hipCtxPushCurrent().
+     */
+    HIP_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
+
+    /**
+     * This indicates that a map or register operation has failed.
+     */
+    HIP_ERROR_MAP_FAILED                     = 205,
+
+    /**
+     * This indicates that an unmap or unregister operation has failed.
+     */
+    HIP_ERROR_UNMAP_FAILED                   = 206,
+
+    /**
+     * This indicates that the specified array is currently mapped and thus
+     * cannot be destroyed.
+     */
+    HIP_ERROR_ARRAY_IS_MAPPED                = 207,
+
+    /**
+     * This indicates that the resource is already mapped.
+     */
+    HIP_ERROR_ALREADY_MAPPED                 = 208,
+
+    /**
+     * This indicates that there is no kernel image available that is suitable
+     * for the device. This can occur when a user specifies code generation
+     * options for a particular HIP source file that do not include the
+     * corresponding device configuration.
+     */
+    HIP_ERROR_NO_BINARY_FOR_GPU              = 209,
+
+    /**
+     * This indicates that a resource has already been acquired.
+     */
+    HIP_ERROR_ALREADY_ACQUIRED               = 210,
+
+    /**
+     * This indicates that a resource is not mapped.
+     */
+    HIP_ERROR_NOT_MAPPED                     = 211,
+
+    /**
+     * This indicates that a mapped resource is not available for access as an
+     * array.
+     */
+    HIP_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
+
+    /**
+     * This indicates that a mapped resource is not available for access as a
+     * pointer.
+     */
+    HIP_ERROR_NOT_MAPPED_AS_POINTER          = 213,
+
+    /**
+     * This indicates that an uncorrectable ECC error was detected during
+     * execution.
+     */
+    HIP_ERROR_ECC_UNCORRECTABLE              = 214,
+
+    /**
+     * This indicates that the ::HIPlimit passed to the API call is not
+     * supported by the active device.
+     */
+    HIP_ERROR_UNSUPPORTED_LIMIT              = 215,
+
+    /**
+     * This indicates that the ::HIPcontext passed to the API call can
+     * only be bound to a single CPU thread at a time but is already
+     * bound to a CPU thread.
+     */
+    HIP_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
+
+    /**
+     * This indicates that peer access is not supported across the given
+     * devices.
+     */
+    HIP_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
+
+    /**
+     * This indicates that a PTX JIT compilation failed.
+     */
+    HIP_ERROR_INVALID_PTX                    = 218,
+
+    /**
+     * This indicates an error with OpenGL or DirectX context.
+     */
+    HIP_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
+
+    /**
+    * This indicates that an uncorrectable NVLink error was detected during the
+    * execution.
+    */
+    HIP_ERROR_NVLINK_UNCORRECTABLE           = 220,
+
+    /**
+    * This indicates that the PTX JIT compiler library was not found.
+    */
+    HIP_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
+
+    /**
+     * This indicates that the device kernel source is invalid.
+     */
+    HIP_ERROR_INVALID_SOURCE                 = 300,
+
+    /**
+     * This indicates that the file specified was not found.
+     */
+    HIP_ERROR_FILE_NOT_FOUND                 = 301,
+
+    /**
+     * This indicates that a link to a shared object failed to resolve.
+     */
+    HIP_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+
+    /**
+     * This indicates that initialization of a shared object failed.
+     */
+    HIP_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
+
+    /**
+     * This indicates that an OS call failed.
+     */
+    HIP_ERROR_OPERATING_SYSTEM               = 304,
+
+    /**
+     * This indicates that a resource handle passed to the API call was not
+     * valid. Resource handles are opaque types like ::HIPstream and ::HIPevent.
+     */
+    HIP_ERROR_INVALID_HANDLE                 = 400,
+
+    /**
+     * This indicates that a resource required by the API call is not in a
+     * valid state to perform the requested operation.
+     */
+    HIP_ERROR_ILLEGAL_STATE                  = 401,
+
+    /**
+     * This indicates that a named symbol was not found. Examples of symbols
+     * are global/constant variable names, texture names, and surface names.
+     */
+    HIP_ERROR_NOT_FOUND                      = 500,
+
+    /**
+     * This indicates that asynchronous operations issued previously have not
+     * completed yet. This result is not actually an error, but must be indicated
+     * differently than ::HIP_SUCCESS (which indicates completion). Calls that
+     * may return this value include ::hipEventQuery() and ::hipStreamQuery().
+     */
+    HIP_ERROR_NOT_READY                      = 600,
+
+    /**
+     * While executing a kernel, the device encountered a
+     * load or store instruction on an invalid memory address.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_ILLEGAL_ADDRESS                = 700,
+
+    /**
+     * This indicates that a launch did not occur because it did not have
+     * appropriate resources. This error usually indicates that the user has
+     * attempted to pass too many arguments to the device kernel, or the
+     * kernel launch specifies too many threads for the kernel's register
+     * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
+     * when a 32-bit int is expected) is equivalent to passing too many
+     * arguments and can also result in this error.
+     */
+    HIP_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
+
+    /**
+     * This indicates that the device kernel took too long to execute. This can
+     * only occur if timeouts are enabled - see the device attribute
+     * ::HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_LAUNCH_TIMEOUT                 = 702,
+
+    /**
+     * This error indicates a kernel launch that uses an incompatible texturing
+     * mode.
+     */
+    HIP_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
+
+    /**
+     * This error indicates that a call to ::hipCtxEnablePeerAccess() is
+     * trying to re-enable peer access to a context which has already
+     * had peer access to it enabled.
+     */
+    HIP_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
+
+    /**
+     * This error indicates that ::hipCtxDisablePeerAccess() is
+     * trying to disable peer access which has not been enabled yet
+     * via ::hipCtxEnablePeerAccess().
+     */
+    HIP_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
+
+    /**
+     * This error indicates that the primary context for the specified device
+     * has already been initialized.
+     */
+    HIP_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
+
+    /**
+     * This error indicates that the context hiprrent to the calling thread
+     * has been destroyed using ::hipCtxDestroy, or is a primary context which
+     * has not yet been initialized.
+     */
+    HIP_ERROR_CONTEXT_IS_DESTROYED           = 709,
+
+    /**
+     * A device-side assert triggered during kernel execution. The context
+     * cannot be used anymore, and must be destroyed. All existing device
+     * memory allocations from this context are invalid and must be
+     * reconstructed if the program is to continue using HIP.
+     */
+    HIP_ERROR_ASSERT                         = 710,
+
+    /**
+     * This error indicates that the hardware resources required to enable
+     * peer access have been exhausted for one or more of the devices
+     * passed to ::hipCtxEnablePeerAccess().
+     */
+    HIP_ERROR_TOO_MANY_PEERS                 = 711,
+
+    /**
+     * This error indicates that the memory range passed to ::hipMemHostRegister()
+     * has already been registered.
+     */
+    HIP_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
+
+    /**
+     * This error indicates that the pointer passed to ::hipMemHostUnregister()
+     * does not correspond to any currently registered memory region.
+     */
+    HIP_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
+
+    /**
+     * While executing a kernel, the device encountered a stack error.
+     * This can be due to stack corruption or exceeding the stack size limit.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_HARDWARE_STACK_ERROR           = 714,
+
+    /**
+     * While executing a kernel, the device encountered an illegal instruction.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_ILLEGAL_INSTRUCTION            = 715,
+
+    /**
+     * While executing a kernel, the device encountered a load or store instruction
+     * on a memory address which is not aligned.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_MISALIGNED_ADDRESS             = 716,
+
+    /**
+     * While executing a kernel, the device encountered an instruction
+     * which can only operate on memory locations in certain address spaces
+     * (global, shared, or local), but was supplied a memory address not
+     * belonging to an allowed address space.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_INVALID_ADDRESS_SPACE          = 717,
+
+    /**
+     * While executing a kernel, the device program counter wrapped its address space.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_INVALID_PC                     = 718,
+
+    /**
+     * An exception occurred on the device while executing a kernel. Common
+     * causes include dereferencing an invalid device pointer and accessing
+     * out of bounds shared memory. Less common cases can be system specific - more
+     * information about these cases can be found in the system specific user guide.
+     * This leaves the process in an inconsistent state and any further HIP work
+     * will return the same error. To continue using HIP, the process must be terminated
+     * and relaunched.
+     */
+    HIP_ERROR_LAUNCH_FAILED                  = 719,
+
+    /**
+     * This error indicates that the number of blocks launched per grid for a kernel that was
+     * launched via either ::hipLaunchCooperativeKernel or ::hipLaunchCooperativeKernelMultiDevice
+     * exceeds the maximum number of blocks as allowed by ::hipOccupancyMaxActiveBlocksPerMultiprocessor
+     * or ::hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
+     * as specified by the device attribute ::HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
+     */
+    HIP_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
+
+    /**
+     * This error indicates that the attempted operation is not permitted.
+     */
+    HIP_ERROR_NOT_PERMITTED                  = 800,
+
+    /**
+     * This error indicates that the attempted operation is not supported
+     * on the current system or device.
+     */
+    HIP_ERROR_NOT_SUPPORTED                  = 801,
+
+    /**
+     * This error indicates that the system is not yet ready to start any HIP
+     * work.  To continue using HIP, verify the system configuration is in a
+     * valid state and all required driver daemons are actively running.
+     * More information about this error can be found in the system specific
+     * user guide.
+     */
+    HIP_ERROR_SYSTEM_NOT_READY               = 802,
+
+    /**
+     * This error indicates that there is a mismatch between the versions of
+     * the display driver and the HIP driver. Refer to the compatibility documentation
+     * for supported versions.
+     */
+    HIP_ERROR_SYSTEM_DRIVER_MISMATCH         = 803,
+
+    /**
+     * This error indicates that the system was upgraded to run with forward compatibility
+     * but the visible hardware detected by HIP does not support this configuration.
+     * Refer to the compatibility documentation for the supported hardware matrix or ensure
+     * that only supported hardware is visible during initialization via the HIP_VISIBLE_DEVICES
+     * environment variable.
+     */
+    HIP_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
+
+    /**
+     * This error indicates that the operation is not permitted when
+     * the stream is capturing.
+     */
+    HIP_ERROR_STREAM_CAPTURE_UNSUPPORTED     = 900,
+
+    /**
+     * This error indicates that the current capture sequence on the stream
+     * has been invalidated due to a previous error.
+     */
+    HIP_ERROR_STREAM_CAPTURE_INVALIDATED     = 901,
+
+    /**
+     * This error indicates that the operation would have resulted in a merge
+     * of two independent capture sequences.
+     */
+    HIP_ERROR_STREAM_CAPTURE_MERGE           = 902,
+
+    /**
+     * This error indicates that the capture was not initiated in this stream.
+     */
+    HIP_ERROR_STREAM_CAPTURE_UNMATCHED       = 903,
+
+    /**
+     * This error indicates that the capture sequence contains a fork that was
+     * not joined to the primary stream.
+     */
+    HIP_ERROR_STREAM_CAPTURE_UNJOINED        = 904,
+
+    /**
+     * This error indicates that a dependency would have been created which
+     * crosses the capture sequence boundary. Only implicit in-stream ordering
+     * dependencies are allowed to cross the boundary.
+     */
+    HIP_ERROR_STREAM_CAPTURE_ISOLATION       = 905,
+
+    /**
+     * This error indicates a disallowed implicit dependency on a current capture
+     * sequence from HIPStreamLegacy.
+     */
+    HIP_ERROR_STREAM_CAPTURE_IMPLICIT        = 906,
+
+    /**
+     * This error indicates that the operation is not permitted on an event which
+     * was last recorded in a capturing stream.
+     */
+    HIP_ERROR_CAPTURED_EVENT                 = 907,
+
+    /**
+     * A stream capture sequence not initiated with the ::HIP_STREAM_CAPTURE_MODE_RELAXED
+     * argument to ::HIPStreamBeginCapture was passed to ::hipStreamEndCapture in a
+     * different thread.
+     */
+    HIP_ERROR_STREAM_CAPTURE_WRONG_THREAD    = 908,
+
+    /**
+     * This indicates that an unknown internal error has occurred.
+     */
+    HIP_ERROR_UNKNOWN                        = 999
+} HIPresult;
+
+/**
+ * Online compiler and linker options
+ */
+typedef enum HIPjit_option_enum
+{
+    /**
+     * Max number of registers that a thread may use.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    HIP_JIT_MAX_REGISTERS = 0,
+
+    /**
+     * IN: Specifies minimum number of threads per block to target compilation
+     * for\n
+     * OUT: Returns the number of threads the compiler actually targeted.
+     * This restricts the resource utilization fo the compiler (e.g. max
+     * registers) such that a block with the given number of threads should be
+     * able to launch based on register limitations. Note, this option does not
+     * currently take into account any other resource limitations, such as
+     * shared memory utilization.\n
+     * Cannot be combined with ::HIP_JIT_TARGET.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    HIP_JIT_THREADS_PER_BLOCK,
+
+    /**
+     * Overwrites the option value with the total wall clock time, in
+     * milliseconds, spent in the compiler and linker\n
+     * Option type: float\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_WALL_TIME,
+
+    /**
+     * Pointer to a buffer in which to print any log messages
+     * that are informational in nature (the buffer size is specified via
+     * option ::HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n
+     * Option type: char *\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_INFO_LOG_BUFFER,
+
+    /**
+     * IN: Log buffer size in bytes.  Log messages will be capped at this size
+     * (including null terminator)\n
+     * OUT: Amount of log buffer filled with messages\n
+     * Option type: unsigned int\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+
+    /**
+     * Pointer to a buffer in which to print any log messages that
+     * reflect errors (the buffer size is specified via option
+     * ::HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
+     * Option type: char *\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_ERROR_LOG_BUFFER,
+
+    /**
+     * IN: Log buffer size in bytes.  Log messages will be capped at this size
+     * (including null terminator)\n
+     * OUT: Amount of log buffer filled with messages\n
+     * Option type: unsigned int\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+
+    /**
+     * Level of optimizations to apply to generated code (0 - 4), with 4
+     * being the default and highest level of optimizations.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    HIP_JIT_OPTIMIZATION_LEVEL,
+
+    /**
+     * No option value required. Determines the target based on the current
+     * attached context (default)\n
+     * Option type: No option value needed\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_TARGET_FROM_HIPCONTEXT,
+
+    /**
+     * Target is chosen based on supplied ::HIPjit_target.  Cannot be
+     * combined with ::HIP_JIT_THREADS_PER_BLOCK.\n
+     * Option type: unsigned int for enumerated type ::HIPjit_target\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_TARGET,
+
+    /**
+     * Specifies choice of fallback strategy if matching HIPbin is not found.
+     * Choice is based on supplied ::HIPjit_fallback.  This option cannot be
+     * used with HIPLink* APIs as the linker requires exact matches.\n
+     * Option type: unsigned int for enumerated type ::HIPjit_fallback\n
+     * Applies to: compiler only
+     */
+    HIP_JIT_FALLBACK_STRATEGY,
+
+    /**
+     * Specifies whether to create debug information in output (-g)
+     * (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_GENERATE_DEBUG_INFO,
+
+    /**
+     * Generate verbose log messages (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler and linker
+     */
+    HIP_JIT_LOG_VERBOSE,
+
+    /**
+     * Generate line number information (-lineinfo) (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler only
+     */
+    HIP_JIT_GENERATE_LINE_INFO,
+
+    /**
+     * Specifies whether to enable caching explicitly (-dlcm) \n
+     * Choice is based on supplied ::HIPjit_cacheMode_enum.\n
+     * Option type: unsigned int for enumerated type ::HIPjit_cacheMode_enum\n
+     * Applies to: compiler only
+     */
+    HIP_JIT_CACHE_MODE,
+
+    /**
+     * The below jit options are used for internal purposes only, in this version of HIP
+     */
+    HIP_JIT_NEW_SM3X_OPT,
+    HIP_JIT_FAST_COMPILE,
+
+    /**
+     * Array of device symbol names that will be relocated to the corresponing
+     * host addresses stored in ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
+     * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n
+     * When loding a device module, driver will relocate all encountered
+     * unresolved symbols to the host addresses.\n
+     * It is only allowed to register symbols that correspond to unresolved
+     * global variables.\n
+     * It is illegal to register the same device symbol at multiple addresses.\n
+     * Option type: const char **\n
+     * Applies to: dynamic linker only
+     */
+    HIP_JIT_GLOBAL_SYMBOL_NAMES,
+
+    /**
+     * Array of host addresses that will be used to relocate corresponding
+     * device symbols stored in ::HIP_JIT_GLOBAL_SYMBOL_NAMES.\n
+     * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n
+     * Option type: void **\n
+     * Applies to: dynamic linker only
+     */
+    HIP_JIT_GLOBAL_SYMBOL_ADDRESSES,
+
+    /**
+     * Number of entries in ::HIP_JIT_GLOBAL_SYMBOL_NAMES and
+     * ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n
+     * Option type: unsigned int\n
+     * Applies to: dynamic linker only
+     */
+    HIP_JIT_GLOBAL_SYMBOL_COUNT,
+
+    HIP_JIT_NUM_OPTIONS
+
+} HIPjit_option;
+
+
+/**
+ * Device properties
+ */
+typedef enum HIPdevice_attribute_enum {
+    
+    HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,              /**< Maximum number of threads per block */
+    HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 1,                    /**< Maximum block dimension X */
+    HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 2,                    /**< Maximum block dimension Y */
+    HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 3,                    /**< Maximum block dimension Z */
+    HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 4,                     /**< Maximum grid dimension X */
+    HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 5,                     /**< Maximum grid dimension Y */
+    HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 6,                     /**< Maximum grid dimension Z */
+    HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 7,        /**< Maximum shared memory available per block in bytes */
+    HIP_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 7,            /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
+    HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 7, /**< Maximum optin shared memory per block */
+    HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 8,              /**< Memory available on device for __constant__ variables in a HIP C kernel in bytes */
+    HIP_DEVICE_ATTRIBUTE_WARP_SIZE = 9,                         /**< Warp size in threads */
+    HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 10,           /**< Maximum number of 32-bit registers available per block */
+    HIP_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 10,               /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
+    HIP_DEVICE_ATTRIBUTE_CLOCK_RATE = 11,                        /**< Typical clock frequency in kilohertz */
+    HIP_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 12,                 /**< Peak memory clock frequency in kilohertz */
+    HIP_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 13,           /**< Global memory bus width in bits */
+    HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 14,              /**< Number of multiprocessors on device */
+    HIP_DEVICE_ATTRIBUTE_COMPUTE_MODE = 15,                      /**< Compute mode (See ::HIPcomputemode for details) */
+    HIP_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 16,                     /**< Size of L2 cache in bytes */
+    HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 17,    /**< Maximum resident threads per multiprocessor */
+    HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 18,          /**< Major compute capability version number */
+    HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 19,          /**< Minor compute capability version number */
+    HIP_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 20,                /**< Device can possibly execute multiple kernels concurrently */
+    HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID = 21,                        /**< PCI bus ID of the device */
+    HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 22,                     /**< PCI device ID of the device */
+    HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 22,                     /**< PCI domain ID of the device */
+    HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 23,  /**< Maximum shared memory available per multiprocessor in bytes */
+    HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 24,                    /**< Device is on a multi-GPU board */
+    HIP_DEVICE_ATTRIBUTE_INTEGRATED = 25,                        /**< Device is integrated with host memory */
+    HIP_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 26,                /**< Device supports launching cooperative kernels via ::hipLaunchCooperativeKernel */
+    HIP_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 27,   /**< Device can participate in cooperative kernels launched via ::hipLaunchCooperativeKernelMultiDevice */
+    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 28,           /**< Maximum 1D texture width */
+    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 29,           /**< Maximum 2D texture width */
+    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 30,          /**< Maximum 2D texture height */
+    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 31,           /**< Maximum 3D texture width */
+    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 32,          /**< Maximum 3D texture height */
+    HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 33,           /**< Maximum 3D texture depth */
+    
+    HIP_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 37,                 /**< Alignment requirement for textures */
+    HIP_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 38,           /**< Pitch alignment requirement for textures */
+    HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 39,               /**< Specifies whether there is a run time limit on kernels */
+    HIP_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 40,               /**< Device can map host memory into HIP address space */
+    HIP_DEVICE_ATTRIBUTE_ECC_ENABLED = 41,                       /**< Device has ECC support enabled */
+    
+    HIP_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 47,                    /**< Device can allocate managed memory on this system */
+    HIP_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 48, /**< The host can directly access managed memory on the device without migration. */
+    HIP_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 49,         /**< Device can coherently access managed memory concurrently with the CPU */
+    HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 50,            /**< Device supports coherently accessing pageable memory without calling HIPHostRegister on it */
+    HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 51, /**< Device accesses pageable memory via the host's page tables. */
+    HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 52,     /**< ::HIP_STREAM_WAIT_VALUE_NOR is supported. */
+    
+    
+    // HIP_DEVICE_ATTRIBUTE_MAX_PITCH = ,                         /**< Maximum pitch in bytes allowed by memory copies */
+    // HIP_DEVICE_ATTRIBUTE_GPU_OVERLAP = ,                       /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */
+    // 
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = ,   /**< Maximum 2D layered texture width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = ,  /**< Maximum 2D layered texture height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = ,  /**< Maximum layers in a 2D layered texture */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = ,     /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = ,    /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */
+    // HIP_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT =,                 /**< Alignment requirement for surfaces */
+    // HIP_DEVICE_ATTRIBUTE_TCC_DRIVER = ,                        /**< Device is using TCC driver model */
+    // HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = ,                /**< Number of asynchronous engines */
+    // HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = ,                /**< Device shares a unified address space with the host */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = ,   /**< Maximum 1D layered texture width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = ,  /**< Maximum layers in a 1D layered texture */
+    // HIP_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = ,                  /**< Deprecated, do not use. */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = ,    /**< Maximum 2D texture width if HIP_ARRAY3D_TEXTURE_GATHER is set */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = ,   /**< Maximum 2D texture height if HIP_ARRAY3D_TEXTURE_GATHER is set */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = , /**< Alternate maximum 3D texture width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = ,/**< Alternate maximum 3D texture height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = , /**< Alternate maximum 3D texture depth */
+    // 
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = ,      /**< Maximum cubemap texture width/height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = ,  /**< Maximum cubemap layered texture width/height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered texture */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = ,           /**< Maximum 1D surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = ,           /**< Maximum 2D surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = ,          /**< Maximum 2D surface height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = ,           /**< Maximum 3D surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = ,          /**< Maximum 3D surface height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = ,           /**< Maximum 3D surface depth */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = ,   /**< Maximum 1D layered surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = ,  /**< Maximum layers in a 1D layered surface */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = ,   /**< Maximum 2D layered surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = ,  /**< Maximum 2D layered surface height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = ,  /**< Maximum layers in a 2D layered surface */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = ,      /**< Maximum cubemap surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = ,  /**< Maximum cubemap layered surface width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered surface */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = ,    /**< Maximum 1D linear texture width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = ,    /**< Maximum 2D linear texture width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = ,   /**< Maximum 2D linear texture height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = ,    /**< Maximum 2D linear texture pitch in bytes */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 2D texture width */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = ,/**< Maximum mipmapped 2D texture height */
+    // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 1D texture width */
+    // HIP_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = ,       /**< Device supports stream priorities */
+    // HIP_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = ,         /**< Device supports caching globals in L1 */
+    // HIP_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = ,          /**< Device supports caching locals in L1 */
+    // HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = ,  /**< Maximum number of 32-bit registers available per multiprocessor */
+    // HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = ,           /**< Unique id for a group of devices on the same multi-GPU board */
+    // HIP_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = ,       /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/
+    // HIP_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = ,  /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
+    // HIP_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = ,      /**< Device supports compute preemption. */
+    // HIP_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = , /**< Device can access host registered memory at the same virtual address as the CPU */
+    // HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = ,            /**< ::hipStreamBatchMemOp and related APIs are supported. */
+    // HIP_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = ,     /**< 64-bit operations are supported in ::hipStreamBatchMemOp and related APIs. */
+    // HIP_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = ,           /**< Both the ::HIP_STREAM_WAIT_VALUE_FLUSH flag and the ::HIP_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref HIP_MEMOP for additional details. */
+    // HIP_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = ,           /**< Device supports host memory registration via ::HIPHostRegister. */
+    // HIP_DEVICE_ATTRIBUTE_MAX
+} HIPdevice_attribute;
+
+/**
+ * Function cache configurations
+ */
+typedef enum HIPfunc_cache_enum {
+    HIP_FUNC_CACHE_PREFER_NONE    = 0x00, /**< no preference for shared memory or L1 (default) */
+    HIP_FUNC_CACHE_PREFER_SHARED  = 0x01, /**< prefer larger shared memory and smaller L1 cache */
+    HIP_FUNC_CACHE_PREFER_L1      = 0x02, /**< prefer larger L1 cache and smaller shared memory */
+    HIP_FUNC_CACHE_PREFER_EQUAL   = 0x03  /**< prefer equal sized L1 cache and shared memory */
+} HIPfunc_cache;
+
+/**
+ * Shared memory configurations
+ */
+typedef enum HIPsharedconfig_enum {
+    HIP_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE    = 0x00, /**< set default shared memory bank size */
+    HIP_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE  = 0x01, /**< set shared memory bank width to four bytes */
+    HIP_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02  /**< set shared memory bank width to eight bytes */
+} HIPsharedconfig;
+
+/**
+ * Function properties
+ */
+typedef enum HIPfunction_attribute_enum {
+    /**
+     * The maximum number of threads per block, beyond which a launch of the
+     * function would fail. This number depends on both the function and the
+     * device on which the function is currently loaded.
+     */
+    HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+
+    /**
+     * The size in bytes of statically-allocated shared memory required by
+     * this function. This does not include dynamically-allocated shared
+     * memory requested by the user at runtime.
+     */
+    HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+
+    /**
+     * The size in bytes of user-allocated constant memory required by this
+     * function.
+     */
+    HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+
+    /**
+     * The size in bytes of local memory used by each thread of this function.
+     */
+    HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+
+    /**
+     * The number of registers used by each thread of this function.
+     */
+    HIP_FUNC_ATTRIBUTE_NUM_REGS = 4,
+
+    /**
+     * The PTX virtual architecture version for which the function was
+     * compiled. This value is the major PTX version * 10 + the minor PTX
+     * version, so a PTX version 1.3 function would return the value 13.
+     * Note that this may return the undefined value of 0 for cubins
+     * compiled prior to HIP 3.0.
+     */
+    HIP_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+
+    /**
+     * The binary architecture version for which the function was compiled.
+     * This value is the major binary version * 10 + the minor binary version,
+     * so a binary version 1.3 function would return the value 13. Note that
+     * this will return a value of 10 for legacy cubins that do not have a
+     * properly-encoded binary architecture version.
+     */
+    HIP_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+
+    /**
+     * The attribute to indicate whether the function has been compiled with
+     * user specified option "-Xptxas --dlcm=ca" set .
+     */
+    HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+
+    /**
+     * The maximum size in bytes of dynamically-allocated shared memory that can be used by
+     * this function. If the user-specified dynamic shared memory size is larger than this
+     * value, the launch will fail.
+     * See ::hipFuncSetAttribute
+     */
+    HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
+
+    /**
+     * On devices where the L1 cache and shared memory use the same hardware resources,
+     * this sets the shared memory carveout preference, in percent of the total shared memory.
+     * Refer to ::HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
+     * This is only a hint, and the driver can choose a different ratio if required to execute the function.
+     * See ::hipFuncSetAttribute
+     */
+    HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
+
+    HIP_FUNC_ATTRIBUTE_MAX
+} HIPfunction_attribute;
+
+/**
+ * Context creation flags
+ */
+typedef enum HIPctx_flags_enum {
+    HIP_CTX_SCHED_AUTO          = 0x00, /**< Automatic scheduling */
+    HIP_CTX_SCHED_SPIN          = 0x01, /**< Set spin as default scheduling */
+    HIP_CTX_SCHED_YIELD         = 0x02, /**< Set yield as default scheduling */
+    HIP_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
+    HIP_CTX_BLOCKING_SYNC       = 0x04, /**< Set blocking synchronization as default scheduling
+                                         *  \deprecated This flag was deprecated as of HIP 4.0
+                                         *  and was replaced with ::HIP_CTX_SCHED_BLOCKING_SYNC. */
+    HIP_CTX_SCHED_MASK          = 0x07,
+    HIP_CTX_MAP_HOST            = 0x08, /**< Support mapped pinned allocations */
+    HIP_CTX_LMEM_RESIZE_TO_MAX  = 0x10, /**< Keep local memory allocation after launch */
+    HIP_CTX_FLAGS_MASK          = 0x1f
+} HIPctx_flags;
+
+/**
+ * Stream creation flags
+ */
+typedef enum HIPstream_flags_enum {
+    HIP_STREAM_DEFAULT      = 0x0, /**< Default stream flag */
+    HIP_STREAM_NON_BLOCKING = 0x1  /**< Stream does not synchronize with stream 0 (the NULL stream) */
+} HIPstream_flags;
+
+/**
+ * Event creation flags
+ */
+typedef enum HIPevent_flags_enum {
+    HIP_EVENT_DEFAULT        = 0x0, /**< Default event flag */
+    HIP_EVENT_BLOCKING_SYNC  = 0x1, /**< Event uses blocking synchronization */
+    HIP_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */
+    HIP_EVENT_INTERPROCESS   = 0x4  /**< Event is suitable for interprocess use. HIP_EVENT_DISABLE_TIMING must be set */
+} HIPevent_flags;
+
+typedef enum HIPjitInputType_enum
+{
+    /**
+     * Compiled device-class-specific device code\n
+     * Applicable options: none
+     */
+    HIP_JIT_INPUT_HIPBIN = 0,
+
+    /**
+     * PTX source code\n
+     * Applicable options: PTX compiler options
+     */
+    HIP_JIT_INPUT_PTX,
+
+    /**
+     * Bundle of multiple cubins and/or PTX of some device code\n
+     * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY
+     */
+    HIP_JIT_INPUT_FATBINARY,
+
+    /**
+     * Host object with embedded device code\n
+     * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY
+     */
+    HIP_JIT_INPUT_OBJECT,
+
+    /**
+     * Archive of host objects with embedded device code\n
+     * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY
+     */
+    HIP_JIT_INPUT_LIBRARY,
+
+    HIP_JIT_NUM_INPUT_TYPES
+} HIPjitInputType;
+
+#ifdef _WIN32
+#define HIPAPI __stdcall
+#else
+#define HIPAPI
+#endif
+
+#define HIP_API_CALL HIPAPI
+
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXCREATE)              (HIPcontext *, unsigned int, HIPdevice);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXDESTROY)             (HIPcontext);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCACHECONFIG)      (HIPfunc_cache *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCURRENT)          (HIPcontext *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETSHAREDMEMCONFIG)  (HIPsharedconfig *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPOPCURRENT)          (HIPcontext *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT)         (HIPcontext);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCACHECONFIG)      (HIPfunc_cache);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCURRENT)          (HIPcontext);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETSHAREDMEMCONFIG)  (HIPsharedconfig);
+typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE)         ();
+typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE)     (int *, HIPdevice_attribute, HIPdevice);
+typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT)         (int *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGET)              (HIPdevice *, int);
+typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETNAME)          (char *, int, HIPdevice);
+typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICETOTALMEM)         (size_t *, HIPdevice);
+typedef HIPresult (HIP_API_CALL *HIP_HIPDRIVERGETVERSION)       (int *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTCREATE)            (HIPevent *, unsigned int);
+typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTDESTROY)           (HIPevent);
+typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME)       (float *, HIPevent, HIPevent);
+typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTQUERY)             (HIPevent);
+typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTRECORD)            (HIPevent, HIPstream);
+typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE)       (HIPevent);
+typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE)       (int *, HIPfunction_attribute, HIPfunction);
+typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETATTRIBUTE)       (HIPfunction, HIPfunction_attribute, int);
+typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETCACHECONFIG)     (HIPfunction, HIPfunc_cache);
+typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETSHAREDMEMCONFIG) (HIPfunction, HIPsharedconfig);
+typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORNAME)           (HIPresult, const char **);
+typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORSTRING)         (HIPresult, const char **);
+typedef HIPresult (HIP_API_CALL *HIP_HIPINIT)                   (unsigned int);
+typedef HIPresult (HIP_API_CALL *HIP_HIPLAUNCHKERNEL)           (HIPfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, HIPstream, void **, void **);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOC)               (HIPdeviceptr *, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOCHOST)           (void **, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOD)             (HIPdeviceptr, HIPdeviceptr, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOH)             (void *, HIPdeviceptr, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTOD)             (HIPdeviceptr, const void *, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE)                (HIPdeviceptr);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST)            (void *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO)             (size_t *, size_t *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD32)              (HIPdeviceptr, unsigned int, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD8)               (HIPdeviceptr, unsigned char, size_t);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION)      (HIPfunction *, HIPmodule, const char *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL)        (HIPdeviceptr *, size_t *, HIPmodule, const char *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOAD)             (HIPmodule *, const char *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATA)         (HIPmodule *, const void *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATAEX)       (HIPmodule *, const void *, unsigned int, HIPjit_option *, void **);
+typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEUNLOAD)           (HIPmodule);
+typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTART)          ();
+typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTOP)           ();
+typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMCREATE)           (HIPstream *, unsigned int);
+typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMDESTROY)          (HIPstream);
+typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE)      (HIPstream);
+typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMWAITEVENT)        (HIPstream, HIPevent, unsigned int);
+typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCREATE)             (unsigned int, HIPjit_option *, void **, HIPlinkState *);
+typedef HIPresult (HIP_API_CALL *HIP_HIPLINKADDDATA)            (HIPlinkState, HIPjitInputType, void *, size_t, const char *, unsigned int, HIPjit_option *, void **);
+typedef HIPresult (HIP_API_CALL *HIP_HIPLINKDESTROY)            (HIPlinkState);
+typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCOMPLETE)           (HIPlinkState, void **, size_t *);
+
+typedef struct hc_hip_lib
+{
+  hc_dynlib_t lib;
+
+  HIP_HIPCTXCREATE              hipCtxCreate;
+  HIP_HIPCTXDESTROY             hipCtxDestroy;
+  HIP_HIPCTXGETCACHECONFIG      hipCtxGetCacheConfig;
+  HIP_HIPCTXGETCURRENT          hipCtxGetCurrent;
+  HIP_HIPCTXGETSHAREDMEMCONFIG  hipCtxGetSharedMemConfig;
+  HIP_HIPCTXPOPCURRENT          hipCtxPopCurrent;
+  HIP_HIPCTXPUSHCURRENT         hipCtxPushCurrent;
+  HIP_HIPCTXSETCACHECONFIG      hipCtxSetCacheConfig;
+  HIP_HIPCTXSETCURRENT          hipCtxSetCurrent;
+  HIP_HIPCTXSETSHAREDMEMCONFIG  hipCtxSetSharedMemConfig;
+  HIP_HIPCTXSYNCHRONIZE         hipCtxSynchronize;
+  HIP_HIPDEVICEGETATTRIBUTE     hipDeviceGetAttribute;
+  HIP_HIPDEVICEGETCOUNT         hipDeviceGetCount;
+  HIP_HIPDEVICEGET              hipDeviceGet;
+  HIP_HIPDEVICEGETNAME          hipDeviceGetName;
+  HIP_HIPDEVICETOTALMEM         hipDeviceTotalMem;
+  HIP_HIPDRIVERGETVERSION       hipDriverGetVersion;
+  HIP_HIPEVENTCREATE            hipEventCreate;
+  HIP_HIPEVENTDESTROY           hipEventDestroy;
+  HIP_HIPEVENTELAPSEDTIME       hipEventElapsedTime;
+  HIP_HIPEVENTQUERY             hipEventQuery;
+  HIP_HIPEVENTRECORD            hipEventRecord;
+  HIP_HIPEVENTSYNCHRONIZE       hipEventSynchronize;
+  HIP_HIPFUNCGETATTRIBUTE       hipFuncGetAttribute;
+  HIP_HIPFUNCSETATTRIBUTE       hipFuncSetAttribute;
+  HIP_HIPFUNCSETCACHECONFIG     hipFuncSetCacheConfig;
+  HIP_HIPFUNCSETSHAREDMEMCONFIG hipFuncSetSharedMemConfig;
+  HIP_HIPGETERRORNAME           hipGetErrorName;
+  HIP_HIPGETERRORSTRING         hipGetErrorString;
+  HIP_HIPINIT                   hipInit;
+  HIP_HIPLAUNCHKERNEL           hipLaunchKernel;
+  HIP_HIPMEMALLOC               hipMemAlloc;
+  HIP_HIPMEMALLOCHOST           hipMemAllocHost;
+  HIP_HIPMEMCPYDTOD             hipMemcpyDtoD;
+  HIP_HIPMEMCPYDTOH             hipMemcpyDtoH;
+  HIP_HIPMEMCPYHTOD             hipMemcpyHtoD;
+  HIP_HIPMEMFREE                hipMemFree;
+  HIP_HIPMEMFREEHOST            hipMemFreeHost;
+  HIP_HIPMEMGETINFO             hipMemGetInfo;
+  HIP_HIPMEMSETD32              hipMemsetD32;
+  HIP_HIPMEMSETD8               hipMemsetD8;
+  HIP_HIPMODULEGETFUNCTION      hipModuleGetFunction;
+  HIP_HIPMODULEGETGLOBAL        hipModuleGetGlobal;
+  HIP_HIPMODULELOAD             hipModuleLoad;
+  HIP_HIPMODULELOADDATA         hipModuleLoadData;
+  HIP_HIPMODULELOADDATAEX       hipModuleLoadDataEx;
+  HIP_HIPMODULEUNLOAD           hipModuleUnload;
+  HIP_HIPPROFILERSTART          hipProfilerStart;
+  HIP_HIPPROFILERSTOP           hipProfilerStop;
+  HIP_HIPSTREAMCREATE           hipStreamCreate;
+  HIP_HIPSTREAMDESTROY          hipStreamDestroy;
+  HIP_HIPSTREAMSYNCHRONIZE      hipStreamSynchronize;
+  HIP_HIPSTREAMWAITEVENT        hipStreamWaitEvent;
+  HIP_HIPLINKCREATE             hipLinkCreate;
+  HIP_HIPLINKADDDATA            hipLinkAddData;
+  HIP_HIPLINKDESTROY            hipLinkDestroy;
+  HIP_HIPLINKCOMPLETE           hipLinkComplete;
+
+} hc_hip_lib_t;
+
+typedef hc_hip_lib_t HIP_PTR;
+
+#endif // _EXT_HIP_H
\ No newline at end of file
diff --git a/include/ext_hiprtc.h b/include/ext_hiprtc.h
new file mode 100644
index 000000000..cd1be6c4b
--- /dev/null
+++ b/include/ext_hiprtc.h
@@ -0,0 +1,87 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EXT_HIPRTC_H
+#define _EXT_HIPRTC_H
+
+/**
+ * from hip_runtime.h (/opt/rocm/hip/include/hip/amd_detail/hiprtc.h)
+ */
+
+/**
+ * \ingroup error
+ * \brief   The enumerated type hiprtcResult defines API call result codes.
+ *          HIPRTC API functions return hiprtcResult to indicate the call
+ *          result.
+ */
+typedef enum {
+  HIPRTC_SUCCESS = 0,
+  HIPRTC_ERROR_OUT_OF_MEMORY = 1,
+  HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
+  HIPRTC_ERROR_INVALID_INPUT = 3,
+  HIPRTC_ERROR_INVALID_PROGRAM = 4,
+  HIPRTC_ERROR_INVALID_OPTION = 5,
+  HIPRTC_ERROR_COMPILATION = 6,
+  HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+  HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
+  HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
+  HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
+  HIPRTC_ERROR_INTERNAL_ERROR = 11
+} hiprtcResult;
+
+/**
+ * \ingroup compilation
+ * \brief   hiprtcProgram is the unit of compilation, and an opaque handle for
+ *          a program.
+ *
+ * To compile a CUDA program string, an instance of hiprtcProgram must be
+ * created first with ::hiprtcCreateProgram, then compiled with
+ * ::hiprtcCompileProgram.
+ */
+typedef struct _hiprtcProgram *hiprtcProgram;
+
+#ifdef _WIN32
+#define HIPRTCAPI __stdcall
+#else
+#define HIPRTCAPI
+#endif
+
+#define HIPRTC_API_CALL HIPRTCAPI
+
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCADDNAMEEXPRESSION)  (hiprtcProgram, const char * const);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCCOMPILEPROGRAM)     (hiprtcProgram, int, const char * const *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCCREATEPROGRAM)      (hiprtcProgram *, const char *, const char *, int, const char * const *, const char * const *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCDESTROYPROGRAM)     (hiprtcProgram *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETLOWEREDNAME)     (hiprtcProgram, const char * const, const char **);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTX)             (hiprtcProgram, char *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTXSIZE)         (hiprtcProgram, size_t *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOG)      (hiprtcProgram, char *);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOGSIZE)  (hiprtcProgram, size_t *);
+typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING)      (hiprtcResult);
+typedef hiprtcResult  (HIPRTC_API_CALL *HIPRTC_HIPRTCVERSION)            (int *, int *);
+
+typedef struct hc_hiprtc_lib
+{
+  hc_dynlib_t lib;
+
+  HIPRTC_HIPRTCADDNAMEEXPRESSION  hiprtcAddNameExpression;
+  HIPRTC_HIPRTCCOMPILEPROGRAM     hiprtcCompileProgram;
+  HIPRTC_HIPRTCCREATEPROGRAM      hiprtcCreateProgram;
+  HIPRTC_HIPRTCDESTROYPROGRAM     hiprtcDestroyProgram;
+  HIPRTC_HIPRTCGETLOWEREDNAME     hiprtcGetLoweredName;
+  HIPRTC_HIPRTCGETPTX             hiprtcGetCode;
+  HIPRTC_HIPRTCGETPTXSIZE         hiprtcGetCodeSize;
+  HIPRTC_HIPRTCGETPROGRAMLOG      hiprtcGetProgramLog;
+  HIPRTC_HIPRTCGETPROGRAMLOGSIZE  hiprtcGetProgramLogSize;
+  HIPRTC_HIPRTCGETERRORSTRING     hiprtcGetErrorString;
+  HIPRTC_HIPRTCVERSION            hiprtcVersion;
+
+} hc_hiprtc_lib_t;
+
+typedef hc_hiprtc_lib_t HIPRTC_PTR;
+
+int hiprtc_make_options_array_from_string (char *string, char **options);
+
+#endif // _EXT_HIPRTC_H
diff --git a/include/types.h b/include/types.h
index 33877c11d..f68d3050c 100644
--- a/include/types.h
+++ b/include/types.h
@@ -184,6 +184,7 @@ typedef enum vendor_id
   VENDOR_ID_NV            = (1U << 5),
   VENDOR_ID_POCL          = (1U << 6),
   VENDOR_ID_AMD_USE_INTEL = (1U << 7),
+  VENDOR_ID_AMD_USE_HIP   = (1U << 8),
   VENDOR_ID_GENERIC       = (1U << 31)
 
 } vendor_id_t;
@@ -645,6 +646,7 @@ typedef enum user_options_defaults
   MARKOV_THRESHOLD         = 0,
   NONCE_ERROR_CORRECTIONS  = 8,
   BACKEND_IGNORE_CUDA      = false,
+  BACKEND_IGNORE_HIP       = false,
   BACKEND_IGNORE_OPENCL    = false,
   BACKEND_INFO             = false,
   BACKEND_VECTOR_WIDTH     = 0,
@@ -695,113 +697,114 @@ typedef enum user_options_map
   IDX_ATTACK_MODE               = 'a',
   IDX_BACKEND_DEVICES           = 'd',
   IDX_BACKEND_IGNORE_CUDA       = 0xff01,
-  IDX_BACKEND_IGNORE_OPENCL     = 0xff02,
+  IDX_BACKEND_IGNORE_HIP        = 0xff02,
+  IDX_BACKEND_IGNORE_OPENCL     = 0xff03,
   IDX_BACKEND_INFO              = 'I',
-  IDX_BACKEND_VECTOR_WIDTH      = 0xff03,
-  IDX_BENCHMARK_ALL             = 0xff04,
+  IDX_BACKEND_VECTOR_WIDTH      = 0xff04,
+  IDX_BENCHMARK_ALL             = 0xff05,
   IDX_BENCHMARK                 = 'b',
-  IDX_BITMAP_MAX                = 0xff05,
-  IDX_BITMAP_MIN                = 0xff06,
+  IDX_BITMAP_MAX                = 0xff06,
+  IDX_BITMAP_MIN                = 0xff07,
   #ifdef WITH_BRAIN
   IDX_BRAIN_CLIENT              = 'z',
-  IDX_BRAIN_CLIENT_FEATURES     = 0xff07,
-  IDX_BRAIN_HOST                = 0xff08,
-  IDX_BRAIN_PASSWORD            = 0xff09,
-  IDX_BRAIN_PORT                = 0xff0a,
-  IDX_BRAIN_SERVER              = 0xff0b,
-  IDX_BRAIN_SERVER_TIMER        = 0xff0c,
-  IDX_BRAIN_SESSION             = 0xff0d,
-  IDX_BRAIN_SESSION_WHITELIST   = 0xff0e,
+  IDX_BRAIN_CLIENT_FEATURES     = 0xff08,
+  IDX_BRAIN_HOST                = 0xff09,
+  IDX_BRAIN_PASSWORD            = 0xff0a,
+  IDX_BRAIN_PORT                = 0xff0b,
+  IDX_BRAIN_SERVER              = 0xff0c,
+  IDX_BRAIN_SERVER_TIMER        = 0xff0d,
+  IDX_BRAIN_SESSION             = 0xff0e,
+  IDX_BRAIN_SESSION_WHITELIST   = 0xff0f,
   #endif
-  IDX_CPU_AFFINITY              = 0xff0f,
+  IDX_CPU_AFFINITY              = 0xff10,
   IDX_CUSTOM_CHARSET_1          = '1',
   IDX_CUSTOM_CHARSET_2          = '2',
   IDX_CUSTOM_CHARSET_3          = '3',
   IDX_CUSTOM_CHARSET_4          = '4',
-  IDX_DEBUG_FILE                = 0xff10,
-  IDX_DEBUG_MODE                = 0xff11,
-  IDX_ENCODING_FROM             = 0xff12,
-  IDX_ENCODING_TO               = 0xff13,
-  IDX_HASH_INFO                 = 0xff14,
-  IDX_FORCE                     = 0xff15,
-  IDX_HWMON_DISABLE             = 0xff16,
-  IDX_HWMON_TEMP_ABORT          = 0xff17,
+  IDX_DEBUG_FILE                = 0xff11,
+  IDX_DEBUG_MODE                = 0xff12,
+  IDX_ENCODING_FROM             = 0xff13,
+  IDX_ENCODING_TO               = 0xff14,
+  IDX_HASH_INFO                 = 0xff15,
+  IDX_FORCE                     = 0xff16,
+  IDX_HWMON_DISABLE             = 0xff17,
+  IDX_HWMON_TEMP_ABORT          = 0xff18,
   IDX_HASH_MODE                 = 'm',
-  IDX_HCCAPX_MESSAGE_PAIR       = 0xff18,
+  IDX_HCCAPX_MESSAGE_PAIR       = 0xff19,
   IDX_HELP                      = 'h',
-  IDX_HEX_CHARSET               = 0xff19,
-  IDX_HEX_SALT                  = 0xff1a,
-  IDX_HEX_WORDLIST              = 0xff1b,
-  IDX_HOOK_THREADS              = 0xff1c,
-  IDX_IDENTIFY                  = 0xff1d,
+  IDX_HEX_CHARSET               = 0xff1a,
+  IDX_HEX_SALT                  = 0xff1b,
+  IDX_HEX_WORDLIST              = 0xff1c,
+  IDX_HOOK_THREADS              = 0xff1d,
+  IDX_IDENTIFY                  = 0xff1e,
   IDX_INCREMENT                 = 'i',
-  IDX_INCREMENT_MAX             = 0xff1e,
-  IDX_INCREMENT_MIN             = 0xff1f,
-  IDX_INDUCTION_DIR             = 0xff20,
-  IDX_KEEP_GUESSING             = 0xff21,
+  IDX_INCREMENT_MAX             = 0xff1f,
+  IDX_INCREMENT_MIN             = 0xff20,
+  IDX_INDUCTION_DIR             = 0xff21,
+  IDX_KEEP_GUESSING             = 0xff22,
   IDX_KERNEL_ACCEL              = 'n',
   IDX_KERNEL_LOOPS              = 'u',
   IDX_KERNEL_THREADS            = 'T',
-  IDX_KEYBOARD_LAYOUT_MAPPING   = 0xff22,
-  IDX_KEYSPACE                  = 0xff23,
-  IDX_LEFT                      = 0xff24,
+  IDX_KEYBOARD_LAYOUT_MAPPING   = 0xff23,
+  IDX_KEYSPACE                  = 0xff24,
+  IDX_LEFT                      = 0xff25,
   IDX_LIMIT                     = 'l',
-  IDX_LOGFILE_DISABLE           = 0xff25,
-  IDX_LOOPBACK                  = 0xff26,
-  IDX_MACHINE_READABLE          = 0xff27,
-  IDX_MARKOV_CLASSIC            = 0xff28,
-  IDX_MARKOV_DISABLE            = 0xff29,
-  IDX_MARKOV_HCSTAT2            = 0xff2a,
-  IDX_MARKOV_INVERSE            = 0xff2b,
+  IDX_LOGFILE_DISABLE           = 0xff26,
+  IDX_LOOPBACK                  = 0xff27,
+  IDX_MACHINE_READABLE          = 0xff28,
+  IDX_MARKOV_CLASSIC            = 0xff29,
+  IDX_MARKOV_DISABLE            = 0xff2a,
+  IDX_MARKOV_HCSTAT2            = 0xff2b,
+  IDX_MARKOV_INVERSE            = 0xff2c,
   IDX_MARKOV_THRESHOLD          = 't',
-  IDX_NONCE_ERROR_CORRECTIONS   = 0xff2c,
+  IDX_NONCE_ERROR_CORRECTIONS   = 0xff2d,
   IDX_OPENCL_DEVICE_TYPES       = 'D',
   IDX_OPTIMIZED_KERNEL_ENABLE   = 'O',
-  IDX_OUTFILE_AUTOHEX_DISABLE   = 0xff2d,
-  IDX_OUTFILE_CHECK_DIR         = 0xff2e,
-  IDX_OUTFILE_CHECK_TIMER       = 0xff2f,
-  IDX_OUTFILE_FORMAT            = 0xff30,
+  IDX_OUTFILE_AUTOHEX_DISABLE   = 0xff2e,
+  IDX_OUTFILE_CHECK_DIR         = 0xff2f,
+  IDX_OUTFILE_CHECK_TIMER       = 0xff30,
+  IDX_OUTFILE_FORMAT            = 0xff31,
   IDX_OUTFILE                   = 'o',
-  IDX_POTFILE_DISABLE           = 0xff31,
-  IDX_POTFILE_PATH              = 0xff32,
-  IDX_PROGRESS_ONLY             = 0xff33,
-  IDX_QUIET                     = 0xff34,
-  IDX_REMOVE                    = 0xff35,
-  IDX_REMOVE_TIMER              = 0xff36,
-  IDX_RESTORE                   = 0xff37,
-  IDX_RESTORE_DISABLE           = 0xff38,
-  IDX_RESTORE_FILE_PATH         = 0xff39,
+  IDX_POTFILE_DISABLE           = 0xff32,
+  IDX_POTFILE_PATH              = 0xff33,
+  IDX_PROGRESS_ONLY             = 0xff34,
+  IDX_QUIET                     = 0xff35,
+  IDX_REMOVE                    = 0xff36,
+  IDX_REMOVE_TIMER              = 0xff37,
+  IDX_RESTORE                   = 0xff38,
+  IDX_RESTORE_DISABLE           = 0xff39,
+  IDX_RESTORE_FILE_PATH         = 0xff3a,
   IDX_RP_FILE                   = 'r',
-  IDX_RP_GEN_FUNC_MAX           = 0xff3a,
-  IDX_RP_GEN_FUNC_MIN           = 0xff3b,
+  IDX_RP_GEN_FUNC_MAX           = 0xff3b,
+  IDX_RP_GEN_FUNC_MIN           = 0xff3c,
   IDX_RP_GEN                    = 'g',
-  IDX_RP_GEN_SEED               = 0xff3c,
+  IDX_RP_GEN_SEED               = 0xff3d,
   IDX_RULE_BUF_L                = 'j',
   IDX_RULE_BUF_R                = 'k',
-  IDX_RUNTIME                   = 0xff3d,
-  IDX_SCRYPT_TMTO               = 0xff3e,
+  IDX_RUNTIME                   = 0xff3e,
+  IDX_SCRYPT_TMTO               = 0xff3f,
   IDX_SEGMENT_SIZE              = 'c',
-  IDX_SELF_TEST_DISABLE         = 0xff3f,
+  IDX_SELF_TEST_DISABLE         = 0xff40,
   IDX_SEPARATOR                 = 'p',
-  IDX_SESSION                   = 0xff40,
-  IDX_SHOW                      = 0xff41,
+  IDX_SESSION                   = 0xff41,
+  IDX_SHOW                      = 0xff42,
   IDX_SKIP                      = 's',
   IDX_SLOW_CANDIDATES           = 'S',
-  IDX_SPEED_ONLY                = 0xff42,
-  IDX_SPIN_DAMP                 = 0xff43,
-  IDX_STATUS                    = 0xff44,
-  IDX_STATUS_JSON               = 0xff45,
-  IDX_STATUS_TIMER              = 0xff46,
-  IDX_STDOUT_FLAG               = 0xff47,
-  IDX_STDIN_TIMEOUT_ABORT       = 0xff48,
-  IDX_TRUECRYPT_KEYFILES        = 0xff49,
-  IDX_USERNAME                  = 0xff4a,
-  IDX_VERACRYPT_KEYFILES        = 0xff4b,
-  IDX_VERACRYPT_PIM_START       = 0xff4c,
-  IDX_VERACRYPT_PIM_STOP        = 0xff4d,
+  IDX_SPEED_ONLY                = 0xff43,
+  IDX_SPIN_DAMP                 = 0xff44,
+  IDX_STATUS                    = 0xff45,
+  IDX_STATUS_JSON               = 0xff46,
+  IDX_STATUS_TIMER              = 0xff47,
+  IDX_STDOUT_FLAG               = 0xff48,
+  IDX_STDIN_TIMEOUT_ABORT       = 0xff49,
+  IDX_TRUECRYPT_KEYFILES        = 0xff4a,
+  IDX_USERNAME                  = 0xff4b,
+  IDX_VERACRYPT_KEYFILES        = 0xff4c,
+  IDX_VERACRYPT_PIM_START       = 0xff4d,
+  IDX_VERACRYPT_PIM_STOP        = 0xff4e,
   IDX_VERSION_LOWER             = 'v',
   IDX_VERSION                   = 'V',
-  IDX_WORDLIST_AUTOHEX_DISABLE  = 0xff4e,
+  IDX_WORDLIST_AUTOHEX_DISABLE  = 0xff4f,
   IDX_WORKLOAD_PROFILE          = 'w',
 
 } user_options_map_t;
@@ -1077,7 +1080,10 @@ typedef struct hc_fp
 } HCFILE;
 
 #include "ext_nvrtc.h"
+#include "ext_hiprtc.h"
+
 #include "ext_cuda.h"
+#include "ext_hip.h"
 #include "ext_OpenCL.h"
 
 typedef struct hc_device_param
@@ -1478,6 +1484,86 @@ typedef struct hc_device_param
   CUdeviceptr       cuda_d_st_salts_buf;
   CUdeviceptr       cuda_d_st_esalts_buf;
 
+  // API: hip
+
+  bool              is_hip;
+
+  int               hip_warp_size;
+
+  HIPdevice         hip_device;
+  HIPcontext        hip_context;
+  HIPstream         hip_stream;
+
+  HIPevent          hip_event1;
+  HIPevent          hip_event2;
+
+  HIPmodule         hip_module;
+  HIPmodule         hip_module_shared;
+  HIPmodule         hip_module_mp;
+  HIPmodule         hip_module_amp;
+
+  HIPfunction       hip_function1;
+  HIPfunction       hip_function12;
+  HIPfunction       hip_function2p;
+  HIPfunction       hip_function2;
+  HIPfunction       hip_function2e;
+  HIPfunction       hip_function23;
+  HIPfunction       hip_function3;
+  HIPfunction       hip_function4;
+  HIPfunction       hip_function_init2;
+  HIPfunction       hip_function_loop2p;
+  HIPfunction       hip_function_loop2;
+  HIPfunction       hip_function_mp;
+  HIPfunction       hip_function_mp_l;
+  HIPfunction       hip_function_mp_r;
+  HIPfunction       hip_function_amp;
+  HIPfunction       hip_function_tm;
+  HIPfunction       hip_function_memset;
+  HIPfunction       hip_function_atinit;
+  HIPfunction       hip_function_utf8toutf16le;
+  HIPfunction       hip_function_decompress;
+  HIPfunction       hip_function_aux1;
+  HIPfunction       hip_function_aux2;
+  HIPfunction       hip_function_aux3;
+  HIPfunction       hip_function_aux4;
+
+  HIPdeviceptr      hip_d_pws_buf;
+  HIPdeviceptr      hip_d_pws_amp_buf;
+  HIPdeviceptr      hip_d_pws_comp_buf;
+  HIPdeviceptr      hip_d_pws_idx;
+  HIPdeviceptr      hip_d_rules;
+  HIPdeviceptr      hip_d_rules_c;
+  HIPdeviceptr      hip_d_combs;
+  HIPdeviceptr      hip_d_combs_c;
+  HIPdeviceptr      hip_d_bfs;
+  HIPdeviceptr      hip_d_bfs_c;
+  HIPdeviceptr      hip_d_tm_c;
+  HIPdeviceptr      hip_d_bitmap_s1_a;
+  HIPdeviceptr      hip_d_bitmap_s1_b;
+  HIPdeviceptr      hip_d_bitmap_s1_c;
+  HIPdeviceptr      hip_d_bitmap_s1_d;
+  HIPdeviceptr      hip_d_bitmap_s2_a;
+  HIPdeviceptr      hip_d_bitmap_s2_b;
+  HIPdeviceptr      hip_d_bitmap_s2_c;
+  HIPdeviceptr      hip_d_bitmap_s2_d;
+  HIPdeviceptr      hip_d_plain_bufs;
+  HIPdeviceptr      hip_d_digests_buf;
+  HIPdeviceptr      hip_d_digests_shown;
+  HIPdeviceptr      hip_d_salt_bufs;
+  HIPdeviceptr      hip_d_esalt_bufs;
+  HIPdeviceptr      hip_d_tmps;
+  HIPdeviceptr      hip_d_hooks;
+  HIPdeviceptr      hip_d_result;
+  HIPdeviceptr      hip_d_extra0_buf;
+  HIPdeviceptr      hip_d_extra1_buf;
+  HIPdeviceptr      hip_d_extra2_buf;
+  HIPdeviceptr      hip_d_extra3_buf;
+  HIPdeviceptr      hip_d_root_css_buf;
+  HIPdeviceptr      hip_d_markov_css_buf;
+  HIPdeviceptr      hip_d_st_digests_buf;
+  HIPdeviceptr      hip_d_st_salts_buf;
+  HIPdeviceptr      hip_d_st_esalts_buf;
+
   // API: opencl
 
   bool              is_opencl;
@@ -1569,18 +1655,25 @@ typedef struct backend_ctx
 {
   bool                enabled;
 
-  void               *ocl;
   void               *cuda;
+  void               *hip;
+  void               *ocl;
+
   void               *nvrtc;
+  void               *hiprtc;
 
   int                 backend_device_from_cuda[DEVICES_MAX];                              // from cuda device index to backend device index
+  int                 backend_device_from_hip[DEVICES_MAX];                               // from hip device index to backend device index
   int                 backend_device_from_opencl[DEVICES_MAX];                            // from opencl device index to backend device index
   int                 backend_device_from_opencl_platform[CL_PLATFORMS_MAX][DEVICES_MAX]; // from opencl device index to backend device index (by platform)
 
   int                 backend_devices_cnt;
   int                 backend_devices_active;
+
   int                 cuda_devices_cnt;
   int                 cuda_devices_active;
+  int                 hip_devices_cnt;
+  int                 hip_devices_active;
   int                 opencl_devices_cnt;
   int                 opencl_devices_active;
 
@@ -1614,6 +1707,14 @@ typedef struct backend_ctx
   int                 nvrtc_driver_version;
   int                 cuda_driver_version;
 
+  // hip
+
+  int                 rc_hip_init;
+  int                 rc_hiprtc_init;
+
+  int                 hiprtc_driver_version;
+  int                 hip_driver_version;
+
   // opencl
 
   cl_platform_id     *opencl_platforms;
@@ -2014,6 +2115,7 @@ typedef struct user_options
   bool         markov_disable;
   bool         markov_inverse;
   bool         backend_ignore_cuda;
+  bool         backend_ignore_hip;
   bool         backend_ignore_opencl;
   bool         backend_info;
   bool         optimized_kernel_enable;
diff --git a/src/Makefile b/src/Makefile
index d7d8d2915..e1ff6a534 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -6,7 +6,7 @@
 SHARED                  ?= 0
 DEBUG                   := 0
 PRODUCTION              := 0
-PRODUCTION_VERSION      := v6.2.2
+PRODUCTION_VERSION      := v6.2.3
 ENABLE_CUBIN            ?= 1
 ENABLE_BRAIN            ?= 1
 ENABLE_UNRAR            ?= 1
@@ -360,7 +360,7 @@ EMU_OBJS_ALL            += emu_inc_rp emu_inc_rp_optimized
 EMU_OBJS_ALL            += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1
 EMU_OBJS_ALL            += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish
 
-OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
+OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_hip ext_nvapi ext_nvml ext_nvrtc ext_hiprtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
 
 ifeq ($(ENABLE_BRAIN),1)
 OBJS_ALL                += brain
diff --git a/src/autotune.c b/src/autotune.c
index 19230d540..254442811 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -157,8 +157,9 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max;
 
-    int CL_rc;
     int CU_rc;
+    int HIP_rc;
+    int CL_rc;
 
     if (device_param->is_cuda == true)
     {
@@ -167,6 +168,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (CU_rc == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      HIP_rc = run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max);
+
+      if (HIP_rc == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
@@ -190,6 +198,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (CU_rc == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            HIP_rc = hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t));
+
+            if (HIP_rc == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
@@ -383,6 +398,27 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (CU_rc == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    int HIP_rc;
+
+    HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 0, device_param->size_pws);
+
+    if (HIP_rc == -1) return -1;
+
+    HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, 0, device_param->size_plains);
+
+    if (HIP_rc == -1) return -1;
+
+    HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_digests_shown, 0, device_param->size_shown);
+
+    if (HIP_rc == -1) return -1;
+
+    HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_result, 0, device_param->size_results);
+
+    if (HIP_rc == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     int CL_rc;
@@ -451,9 +487,12 @@ HC_API_CALL void *thread_autotune (void *p)
 
   if (device_param->is_cuda == true)
   {
-    const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
+    if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+  }
 
-    if (rc_cuCtxSetCurrent == -1) return NULL;
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
   }
 
   const int rc_autotune = autotune (hashcat_ctx, device_param);
@@ -463,5 +502,15 @@ HC_API_CALL void *thread_autotune (void *p)
     // we should do something here, tell hashcat main that autotune failed to abort
   }
 
+  if (device_param->is_cuda == true)
+  {
+    if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
+  }
+
   return NULL;
 }
diff --git a/src/backend.c b/src/backend.c
index 20ec98a0e..4caff74ce 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -55,6 +55,10 @@ static bool is_same_device (const hc_device_param_t *src, const hc_device_param_
 
   if ((src->is_cuda == true) && (dst->is_cuda == true)) return false;
 
+  // HIP can't have aliases
+
+  if ((src->is_hip == true) && (dst->is_hip == true)) return false;
+
   // But OpenCL can have aliases
 
   if ((src->is_opencl == true) && (dst->is_opencl == true))
@@ -119,6 +123,10 @@ static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx)
 
       if (alias_device->is_cuda == true) continue;
 
+      // this lets HIP devices survive over OpenCL
+
+      if (alias_device->is_hip == true) continue;
+
         // this lets native OpenCL runtime survive over generic OpenCL runtime
 
       if (alias_device->opencl_device_type & CL_DEVICE_TYPE_CPU)
@@ -153,6 +161,7 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p
   if (strcmp (src->device_name, dst->device_name) != 0) return false;
 
   if (src->is_cuda   != dst->is_cuda)   return false;
+  if (src->is_hip    != dst->is_hip)    return false;
   if (src->is_opencl != dst->is_opencl) return false;
 
   if (strcmp (src->device_name, dst->device_name) != 0) return false;
@@ -960,6 +969,213 @@ int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor)
   return 0;
 }
 
+// HIPRTC
+
+int hiprtc_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  memset (hiprtc, 0, sizeof (HIPRTC_PTR));
+
+  #if   defined (_WIN)
+  hiprtc->lib = hc_dlopen ("fixme.dll");
+  #elif defined (__APPLE__)
+  hiprtc->lib = hc_dlopen ("fixme.dylib");
+  #elif defined (__CYGWIN__)
+  hiprtc->lib = hc_dlopen ("fixme.dll");
+  #else
+  hiprtc->lib = hc_dlopen ("libamdhip64.so");
+
+  if (hiprtc->lib == NULL) hiprtc->lib = hc_dlopen ("libamdhip64.so.4");
+  #endif
+
+  if (hiprtc->lib == NULL) return -1;
+
+  HC_LOAD_FUNC (hiprtc, hiprtcAddNameExpression,  HIPRTC_HIPRTCADDNAMEEXPRESSION, HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcCompileProgram,     HIPRTC_HIPRTCCOMPILEPROGRAM,    HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcCreateProgram,      HIPRTC_HIPRTCCREATEPROGRAM,     HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcDestroyProgram,     HIPRTC_HIPRTCDESTROYPROGRAM,    HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetLoweredName,     HIPRTC_HIPRTCGETLOWEREDNAME,    HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetCode,            HIPRTC_HIPRTCGETPTX,            HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize,        HIPRTC_HIPRTCGETPTXSIZE,        HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLog,      HIPRTC_HIPRTCGETPROGRAMLOG,     HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLogSize,  HIPRTC_HIPRTCGETPROGRAMLOGSIZE, HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcGetErrorString,     HIPRTC_HIPRTCGETERRORSTRING,    HIPRTC, 1);
+  HC_LOAD_FUNC (hiprtc, hiprtcVersion,            HIPRTC_HIPRTCVERSION,           HIPRTC, 1);
+
+  return 0;
+}
+
+void hiprtc_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  if (hiprtc)
+  {
+    if (hiprtc->lib)
+    {
+      hc_dlclose (hiprtc->lib);
+    }
+
+    hcfree (backend_ctx->hiprtc);
+
+    backend_ctx->hiprtc = NULL;
+  }
+}
+
+int hc_hiprtcCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcCreateProgram (prog, src, name, numHeaders, headers, includeNames);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcCreateProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcDestroyProgram (prog);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcDestroyProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  #if 0
+  for(int i =0; i< numOptions; i++)
+    printf("Option_%d = %s\n", i, options[i]);
+  #endif
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcCompileProgram (prog, numOptions, options);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcCompileProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetProgramLogSize (prog, logSizeRet);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcGetProgramLogSize(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetProgramLog (prog, log);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcGetProgramLog(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, ptxSizeRet);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcGetCodeSize(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, ptx);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcGetCode(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc;
+
+  const hiprtcResult HIPRTC_err = hiprtc->hiprtcVersion (major, minor);
+
+  if (HIPRTC_err != HIPRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "hiprtcVersion(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
 // CUDA
 
 int cuda_init (hashcat_ctx_t *hashcat_ctx)
@@ -2167,6 +2383,1220 @@ int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cub
   return 0;
 }
 
+// HIP
+
+int hip_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  memset (hip, 0, sizeof (HIP_PTR));
+
+  #if   defined (_WIN)
+  hip->lib = hc_dlopen ("fixme.dll");
+  #elif defined (__APPLE__)
+  hip->lib = hc_dlopen ("fixme.dylib");
+  #elif defined (__CYGWIN__)
+  hip->lib = hc_dlopen ("fixme.dll");
+  #else
+  hip->lib = hc_dlopen ("libamdhip64.so");
+
+  //TODO: grab the 4 from the major RT version
+  if (hip->lib == NULL) hip->lib = hc_dlopen ("libamdhip64.so.4.2.40200");
+  #endif
+
+  if (hip->lib == NULL) return -1;
+
+  // finding the right symbol is a PITA,
+  #define HC_LOAD_FUNC_HIP(ptr,name,hipname,type,libname,noerr) \
+    do { \
+      ptr->name = (type) hc_dlsym ((ptr)->lib, #hipname); \
+      if ((noerr) != -1) { \
+        if (!(ptr)->name) { \
+          if ((noerr) == 1) { \
+            event_log_error (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \
+            return -1; \
+          } \
+          if ((noerr) != 1) { \
+            event_log_warning (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \
+            return 0; \
+          } \
+        } \
+      } \
+    } while (0)
+
+  // finding the right symbol is a PITA, because of the _v2 suffix
+  // a good reference is cuda.h itself
+  // this needs to be verified for each new cuda release
+
+  HC_LOAD_FUNC_HIP (hip, hipCtxCreate,              hipCtxCreate,               HIP_HIPCTXCREATE,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxDestroy,             hipCtxDestroy,              HIP_HIPCTXDESTROY,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig,      hipCtxGetCacheConfig,       HIP_HIPCTXGETCACHECONFIG,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent,          hipCtxGetCurrent,           HIP_HIPCTXGETCURRENT,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig,  hipCtxGetSharedMemConfig,   HIP_HIPCTXGETSHAREDMEMCONFIG,   HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent,          hipCtxPopCurrent,           HIP_HIPCTXPOPCURRENT,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent,         hipCtxPushCurrent,          HIP_HIPCTXPUSHCURRENT,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig,      hipCtxSetCacheConfig,       HIP_HIPCTXSETCACHECONFIG,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent,          hipCtxSetCurrent,           HIP_HIPCTXSETCURRENT,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig,  hipCtxSetSharedMemConfig,   HIP_HIPCTXSETSHAREDMEMCONFIG,   HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize,         hipCtxSynchronize,          HIP_HIPCTXSYNCHRONIZE,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute,     hipDeviceGetAttribute,      HIP_HIPDEVICEGETATTRIBUTE,      HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount,         hipGetDeviceCount,          HIP_HIPDEVICEGETCOUNT,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGet,              hipDeviceGet,               HIP_HIPDEVICEGET,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGetName,          hipDeviceGetName,           HIP_HIPDEVICEGETNAME,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem,         hipDeviceTotalMem,          HIP_HIPDEVICETOTALMEM,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion,       hipDriverGetVersion,        HIP_HIPDRIVERGETVERSION,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventCreate,            hipEventCreateWithFlags,    HIP_HIPEVENTCREATE,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventDestroy,           hipEventDestroy,            HIP_HIPEVENTDESTROY,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime,       hipEventElapsedTime,        HIP_HIPEVENTELAPSEDTIME,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventQuery,             hipEventQuery,              HIP_HIPEVENTQUERY,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventRecord,            hipEventRecord,             HIP_HIPEVENTRECORD,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventSynchronize,       hipEventSynchronize,        HIP_HIPEVENTSYNCHRONIZE,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute,       hipFuncGetAttribute,        HIP_HIPFUNCGETATTRIBUTE,        HIP, 1);
+  //HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute,       hipFuncSetAttribute,        HIP_HIPFUNCSETATTRIBUTE,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig,     hipFuncSetCacheConfig,      HIP_HIPFUNCSETCACHECONFIG,      HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig,  HIP_HIPFUNCSETSHAREDMEMCONFIG,  HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipGetErrorName,            HIP_HIPGETERRORNAME,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorString,         hipGetErrorString,          HIP_HIPGETERRORSTRING,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipInit,                   hipInit,                    HIP_HIPINIT,                    HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipLaunchKernel,           hipModuleLaunchKernel,      HIP_HIPLAUNCHKERNEL,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemAlloc,               hipMalloc,                  HIP_HIPMEMALLOC,                HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemAllocHost,           hipMemAllocHost,            HIP_HIPMEMALLOCHOST,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD,             hipMemcpyDtoD,              HIP_HIPMEMCPYDTOD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH,             hipMemcpyDtoH,              HIP_HIPMEMCPYDTOH,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD,             hipMemcpyHtoD,              HIP_HIPMEMCPYHTOD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemFree,                hipFree,                    HIP_HIPMEMFREE,                 HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemFreeHost,            hipFreeHost,                HIP_HIPMEMFREEHOST,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemGetInfo,             hipMemGetInfo,              HIP_HIPMEMGETINFO,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD32,              hipMemsetD32,               HIP_HIPMEMSETD32,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD8,               hipMemsetD8,                HIP_HIPMEMSETD8,                HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction,      hipModuleGetFunction,       HIP_HIPMODULEGETFUNCTION,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal,        hipModuleGetGlobal,         HIP_HIPMODULEGETGLOBAL,         HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleLoad,             hipModuleLoad,              HIP_HIPMODULELOAD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleLoadData,         hipModuleLoadData,          HIP_HIPMODULELOADDATA,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx,       hipModuleLoadDataEx,        HIP_HIPMODULELOADDATAEX,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleUnload,           hipModuleUnload,            HIP_HIPMODULEUNLOAD,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipProfilerStart,          hipProfilerStart,           HIP_HIPPROFILERSTART,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipProfilerStop,           hipProfilerStop,            HIP_HIPPROFILERSTOP,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamCreate,           hipStreamCreate,            HIP_HIPSTREAMCREATE,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamDestroy,          hipStreamDestroy,           HIP_HIPSTREAMDESTROY,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize,      hipStreamSynchronize,       HIP_HIPSTREAMSYNCHRONIZE,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent,        hipStreamWaitEvent,         HIP_HIPSTREAMWAITEVENT,         HIP, 1);
+  #if defined (WITH_CUBINX)
+  HC_LOAD_FUNC_HIP (hip, hipLinkCreate,             hipLinkCreate,              HIP_HIPLINKCREATE,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipLinkAddData,            hipLinkAddData,             HIP_HIPLINKADDDATA,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipLinkDestroy,            hipLinkDestroy,             HIP_HIPLINKDESTROY,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipLinkComplete,           hipLinkComplete,            HIP_HIPLINKCOMPLETE,            HIP, 1);
+  #endif
+
+  return 0;
+}
+
+void hip_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  if (hip)
+  {
+    if (hip->lib)
+    {
+      hc_dlclose (hip->lib);
+    }
+
+    hcfree (backend_ctx->hip);
+
+    backend_ctx->hip = NULL;
+  }
+}
+
+int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipInit (Flags);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipInit(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipDeviceGetCount (count);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGetCount(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGetCount(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice* device, int ordinal)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipDeviceGet (device, ordinal);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipDeviceGetName (name, len, dev);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGetName(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipDeviceGetName(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipDeviceTotalMem (bytes, dev);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipDriverGetVersion (driverVersion);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipDriverGetVersion(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipDriverGetVersion(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxCreate (pctx, flags, dev);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxCreate(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxDestroy (ctx);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxDestroy(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipModuleUnload (hmod);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxSetCurrent (ctx);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipMemAlloc (dptr, bytesize);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipMemFree (dptr);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t *bytes, HIPmodule hmod, const char *name)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipMemGetInfo (free, total);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+/*
+
+// ATTENTION, this one maps to cudaFuncSetAttribute not cuFuncSetAttribute !!!
+
+int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipFuncSetAttribute (hfunc, attrib, value);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+*/
+
+int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipStreamCreate (phStream, Flags);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipStreamDestroy (hStream);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipStreamSynchronize (hStream);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxSynchronize ();
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipEventCreate (phEvent, Flags);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipEventCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventCreate(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipEventDestroy (hEvent);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipEventDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventDestroy(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipEventElapsedTime(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventElapsedTime(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipEventQuery (hEvent);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipEventQuery(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventQuery(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipEventRecord (hEvent, hStream);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipEventRecord(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventRecord(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipEventSynchronize (hEvent);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipEventSynchronize(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventSynchronize(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxSetCacheConfig (config);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxPushCurrent (ctx);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipCtxPopCurrent (pctx);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipLinkCreate (numOptions, options, optionValues, stateOut);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipLinkCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipLinkCreate(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipLinkAddData (state, type, data, size, name, numOptions, options, optionValues);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipLinkAddData(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipLinkAddData(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipLinkDestroy (state);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipLinkDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipLinkDestroy(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const HIPresult HIP_err = hip->hipLinkComplete (state, hipbinOut, sizeOut);
+
+  if (HIP_err != HIP_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "hipLinkComplete(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipLinkComplete(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -2916,6 +4346,15 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
     if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
+
+    if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
+
+    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL) == -1) return -1;
@@ -2937,6 +4376,18 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
     }
   }
 
+  if (device_param->is_hip == true)
+  {
+    if (cnt > 0)
+    {
+      if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
+
+      if (hc_hipMemcpyDtoH (hashcat_ctx,pw->i, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
+
+      if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
+    }
+  }
+
   if (device_param->is_opencl == true)
   {
     if (cnt > 0)
@@ -2986,6 +4437,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
             if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c, size_tm) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm) == -1) return -1;
@@ -2998,6 +4454,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
             if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_tm_c, size_tm) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL) == -1) return -1;
@@ -3059,6 +4520,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_pws_buf, device_param->cuda_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -3079,6 +4545,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           if (run_cuda_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, pws_cnt) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, pws_cnt) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, pws_cnt) == -1) return -1;
@@ -3096,6 +4567,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -3145,6 +4621,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -3234,6 +4715,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
               if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
             }
 
+            if (device_param->is_hip == true)
+            {
+              if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
+            }
+
             if (device_param->is_opencl == true)
             {
               if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -3283,6 +4769,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
               if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
             }
 
+            if (device_param->is_hip == true)
+            {
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
+            }
+
             if (device_param->is_opencl == true)
             {
               if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -3390,6 +4881,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
       if (run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_tmps,   device_param->size_tmps) == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (run_hip_kernel_bzero    (hashcat_ctx, device_param, device_param->hip_d_tmps,    device_param->size_tmps) == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps) == -1) return -1;
@@ -3403,6 +4899,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         if (run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_hooks,   pws_cnt * hashconfig->hook_size) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (run_hip_kernel_bzero    (hashcat_ctx, device_param, device_param->hip_d_hooks,    pws_cnt * hashconfig->hook_size) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
@@ -3557,6 +5058,99 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
 }
 
+int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_atinit[0]       = (void *) &buf;
+  device_param->kernel_params_atinit_buf64[1] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_atinit;
+
+  num_elements = CEILDIV (num_elements, kernel_threads);
+
+  HIPfunction function = device_param->hip_function_atinit;
+
+  if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_atinit, NULL) == -1) return -1;
+
+  if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+
+  return 0;
+}
+
+int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_utf8toutf16le[0]       = (void *) &buf;
+  device_param->kernel_params_utf8toutf16le_buf64[1] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_utf8toutf16le;
+
+  num_elements = CEILDIV (num_elements, kernel_threads);
+
+  HIPfunction function = device_param->hip_function_utf8toutf16le;
+
+  if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_utf8toutf16le, NULL) == -1) return -1;
+
+  if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+
+  return 0;
+}
+
+int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size)
+{
+  const u64 num16d = size / 16;
+  const u64 num16m = size % 16;
+
+  if (num16d)
+  {
+    device_param->kernel_params_memset[0]       = (void *) &buf;
+    device_param->kernel_params_memset_buf32[1] = value;
+    device_param->kernel_params_memset_buf64[2] = num16d;
+
+    const u64 kernel_threads = device_param->kernel_wgs_memset;
+
+    u64 num_elements = num16d;
+
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    HIPfunction function = device_param->hip_function_memset;
+
+    //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem),   (void *) &buf);                         if (HIP_rc == -1) return -1;
+    //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (HIP_rc == -1) return -1;
+    //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (HIP_rc == -1) return -1;
+
+    //const size_t global_work_size[3] = { num_elements,   1, 1 };
+    //const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1;
+
+    if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+  }
+
+  if (num16m)
+  {
+    u32 tmp[4];
+
+    tmp[0] = value;
+    tmp[1] = value;
+    tmp[2] = value;
+    tmp[3] = value;
+
+    // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/
+
+    if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1;
+  }
+
+  return 0;
+}
+
+int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size)
+{
+  return run_hip_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
+}
+
 int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
 {
   u64 num_elements = num;
@@ -3860,6 +5454,105 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
     }
   }
 
+  if (device_param->is_hip == true)
+  {
+    HIPfunction hip_function = NULL;
+
+    if (device_param->is_hip == true)
+    {
+      switch (kern_run)
+      {
+        case KERN_RUN_1:      hip_function = device_param->hip_function1;       break;
+        case KERN_RUN_12:     hip_function = device_param->hip_function12;      break;
+        case KERN_RUN_2P:     hip_function = device_param->hip_function2p;      break;
+        case KERN_RUN_2:      hip_function = device_param->hip_function2;       break;
+        case KERN_RUN_2E:     hip_function = device_param->hip_function2e;      break;
+        case KERN_RUN_23:     hip_function = device_param->hip_function23;      break;
+        case KERN_RUN_3:      hip_function = device_param->hip_function3;       break;
+        case KERN_RUN_4:      hip_function = device_param->hip_function4;       break;
+        case KERN_RUN_INIT2:  hip_function = device_param->hip_function_init2;  break;
+        case KERN_RUN_LOOP2P: hip_function = device_param->hip_function_loop2p; break;
+        case KERN_RUN_LOOP2:  hip_function = device_param->hip_function_loop2;  break;
+        case KERN_RUN_AUX1:   hip_function = device_param->hip_function_aux1;   break;
+        case KERN_RUN_AUX2:   hip_function = device_param->hip_function_aux2;   break;
+        case KERN_RUN_AUX3:   hip_function = device_param->hip_function_aux3;   break;
+        case KERN_RUN_AUX4:   hip_function = device_param->hip_function_aux4;   break;
+      }
+
+      //if (hc_hipFuncSetAttribute (hashcat_ctx, hip_function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1;
+    }
+
+    if (kernel_threads == 0) kernel_threads = 1;
+
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    if (kern_run == KERN_RUN_1)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_3)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_INIT2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT2)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_LOOP2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP2)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+
+    if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event1, device_param->hip_stream) == -1) return -1;
+
+    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1;
+
+    if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event2, device_param->hip_stream) == -1) return -1;
+
+    if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+
+    if (hc_hipEventSynchronize (hashcat_ctx, device_param->hip_event2) == -1) return -1;
+
+    float exec_ms;
+
+    if (hc_hipEventElapsedTime (hashcat_ctx, &exec_ms, device_param->hip_event1, device_param->hip_event2) == -1) return -1;
+
+    if (event_update)
+    {
+      u32 exec_pos = device_param->exec_pos;
+
+      device_param->exec_msec[exec_pos] = exec_ms;
+
+      exec_pos++;
+
+      if (exec_pos == EXEC_CACHE)
+      {
+        exec_pos = 0;
+      }
+
+      device_param->exec_pos = exec_pos;
+    }
+  }
+
   if (device_param->is_opencl == true)
   {
     cl_kernel opencl_kernel = NULL;
@@ -4089,6 +5782,32 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    HIPfunction hip_function = NULL;
+
+    void **hip_args = NULL;
+
+    switch (kern_run)
+    {
+      case KERN_RUN_MP:   hip_function = device_param->hip_function_mp;
+                          hip_args     = device_param->kernel_params_mp;
+                          break;
+      case KERN_RUN_MP_R: hip_function = device_param->hip_function_mp_r;
+                          hip_args     = device_param->kernel_params_mp_r;
+                          break;
+      case KERN_RUN_MP_L: hip_function = device_param->hip_function_mp_l;
+                          hip_args     = device_param->kernel_params_mp_l;
+                          break;
+    }
+
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, hip_args, NULL) == -1) return -1;
+
+    if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     cl_kernel opencl_kernel = NULL;
@@ -4156,6 +5875,15 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    HIPfunction hip_function = device_param->hip_function_tm;
+
+    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1;
+
+    if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     cl_kernel cuda_kernel = device_param->opencl_kernel_tm;
@@ -4192,6 +5920,17 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    HIPfunction hip_function = device_param->hip_function_amp;
+
+    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1;
+
+    if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     num_elements = round_up_multiple_64 (num_elements, kernel_threads);
@@ -4232,6 +5971,17 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    HIPfunction hip_function = device_param->hip_function_decompress;
+
+    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1;
+
+    if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     num_elements = round_up_multiple_64 (num_elements, kernel_threads);
@@ -4290,6 +6040,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
       }
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+      const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+      const u32 off = pw_idx->off;
+
+      if (off)
+      {
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+      }
+    }
+
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -4324,6 +6088,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         }
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+        const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+        const u32 off = pw_idx->off;
+
+        if (off)
+        {
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+        }
+      }
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -4392,6 +6170,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
           }
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+          const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+          const u32 off = pw_idx->off;
+
+          if (off)
+          {
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+          }
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -4426,6 +6218,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
             }
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+            }
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -4458,6 +6264,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
             }
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
+            }
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
@@ -4679,6 +6499,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
             if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1;
@@ -4799,6 +6624,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
+              if (device_param->is_hip == true)
+              {
+                if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
@@ -4817,6 +6647,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
+              if (device_param->is_hip == true)
+              {
+                if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -4835,6 +6670,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
+              if (device_param->is_hip == true)
+              {
+                if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -4956,6 +6796,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
+              if (device_param->is_hip == true)
+              {
+                if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
@@ -4974,6 +6819,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
                 if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
+              if (device_param->is_hip == true)
+              {
+                if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
+              }
+
               if (device_param->is_opencl == true)
               {
                 if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1;
@@ -4994,6 +6844,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
             if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL) == -1) return -1;
@@ -5278,6 +7133,99 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  /**
+   * Load and map HIP library calls, then init HIP
+   */
+
+  int rc_hip_init = -1;
+
+  if (user_options->backend_ignore_hip == false)
+  {
+    HIP_PTR *hip = (HIP_PTR *) hcmalloc (sizeof (HIP_PTR));
+
+    backend_ctx->hip = hip;
+
+    rc_hip_init = hip_init (hashcat_ctx);
+
+    if (rc_hip_init == -1)
+    {
+      backend_ctx->rc_hip_init = rc_hip_init;
+
+      hip_close (hashcat_ctx);
+    }
+
+    /**
+     * Load and map HIPRTC library calls
+     */
+
+    HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) hcmalloc (sizeof (HIPRTC_PTR));
+
+    backend_ctx->hiprtc = hiprtc;
+
+    int rc_hiprtc_init = hiprtc_init (hashcat_ctx);
+
+    if (rc_hiprtc_init == -1)
+    {
+      backend_ctx->rc_hiprtc_init = rc_hiprtc_init;
+
+      hiprtc_close (hashcat_ctx);
+    }
+
+    /**
+     * Check if both HIP and HIPRTC were load successful
+     */
+
+    if ((rc_hip_init == 0) && (rc_hiprtc_init == 0))
+    {
+      // hiprtc version
+
+      int hiprtc_major = 0;
+      int hiprtc_minor = 0;
+
+      if (hc_hiprtcVersion (hashcat_ctx, &hiprtc_major, &hiprtc_minor) == -1) return -1;
+
+      int hiprtc_driver_version = (hiprtc_major * 1000) + (hiprtc_minor * 10);
+
+      backend_ctx->hiprtc_driver_version = hiprtc_driver_version;
+
+      if (hiprtc_driver_version < 9000)
+      {
+        event_log_error (hashcat_ctx, "Outdated AMD HIPRTC driver version '%d' detected!", hiprtc_driver_version);
+
+        event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions.");
+        event_log_warning (hashcat_ctx, NULL);
+
+        return -1;
+      }
+
+      // hip version
+
+      int hip_driver_version = 10000;
+
+      //if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driver_version) == -1) return -1;
+
+      backend_ctx->hip_driver_version = hip_driver_version;
+
+      if (hip_driver_version < 9000)
+      {
+        event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driver_version);
+
+        event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions.");
+        event_log_warning (hashcat_ctx, NULL);
+
+        return -1;
+      }
+    }
+    else
+    {
+      rc_hip_init  = -1;
+      rc_hiprtc_init = -1;
+
+      hip_close  (hashcat_ctx);
+      hiprtc_close (hashcat_ctx);
+    }
+  }
+
   /**
    * Load and map OpenCL library calls
    */
@@ -5301,11 +7249,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
      * return if both CUDA and OpenCL initialization failed
      */
 
-    if ((rc_cuda_init == -1) && (rc_ocl_init == -1))
+    if ((rc_cuda_init == -1) && (rc_hip_init == -1) && (rc_ocl_init == -1))
     {
-      event_log_error (hashcat_ctx, "ATTENTION! No OpenCL or CUDA installation found.");
+      event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, HIP or CUDA installation found.");
 
-      event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation.");
+      event_log_warning (hashcat_ctx, "You are probably missing the CUDA, HIP or OpenCL runtime installation.");
       event_log_warning (hashcat_ctx, NULL);
 
       #if defined (__linux__)
@@ -5366,6 +7314,18 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  /**
+   * HIP API: init
+   */
+
+  if (backend_ctx->hip)
+  {
+    if (hc_hipInit (hashcat_ctx, 0) == -1)
+    {
+      hip_close (hashcat_ctx);
+    }
+  }
+
   /**
    * OpenCL API: init
    */
@@ -5614,11 +7574,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * Final checks
    */
 
-  if ((backend_ctx->cuda == NULL) && (backend_ctx->ocl == NULL))
+  if ((backend_ctx->cuda == NULL) && (backend_ctx->hip == NULL) && (backend_ctx->ocl == NULL))
   {
-    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible or CUDA-compatible platform found.");
+    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible, HIP-compatible or CUDA-compatible platform found.");
 
-    event_log_warning (hashcat_ctx, "You are probably missing the OpenCL or CUDA runtime installation.");
+    event_log_warning (hashcat_ctx, "You are probably missing the OpenCL, CUDA or HIP runtime installation.");
     event_log_warning (hashcat_ctx, NULL);
 
     #if defined (__linux__)
@@ -5664,9 +7624,12 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
     hcfree (backend_ctx->opencl_platforms_version);
   }
 
-  nvrtc_close (hashcat_ctx);
-  cuda_close  (hashcat_ctx);
-  ocl_close   (hashcat_ctx);
+  nvrtc_close  (hashcat_ctx);
+  hiprtc_close (hashcat_ctx);
+
+  cuda_close   (hashcat_ctx);
+  hip_close    (hashcat_ctx);
+  ocl_close    (hashcat_ctx);
 
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
@@ -5689,6 +7652,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   int backend_devices_idx = 0;
 
+  // CUDA
+
   int cuda_devices_cnt    = 0;
   int cuda_devices_active = 0;
 
@@ -5725,10 +7690,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->cuda_device = cuda_device;
 
-      device_param->is_cuda = true;
-
+      device_param->is_cuda   = true;
+      device_param->is_hip    = false;
       device_param->is_opencl = false;
 
+
       device_param->use_opencl12 = false;
       device_param->use_opencl20 = false;
       device_param->use_opencl21 = false;
@@ -6013,7 +7979,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         continue;
       }
 
-      if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1)
+      if (hc_cuCtxPushCurrent (hashcat_ctx, cuda_context) == -1)
       {
         device_param->skipped = true;
         continue;
@@ -6030,6 +7996,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->device_available_mem = (u64) free;
 
+      if (hc_cuCtxPopCurrent (hashcat_ctx, &cuda_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
       if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1)
       {
         device_param->skipped = true;
@@ -6047,6 +8019,378 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->cuda_devices_cnt     = cuda_devices_cnt;
   backend_ctx->cuda_devices_active  = cuda_devices_active;
 
+  // HIP
+
+  int hip_devices_cnt    = 0;
+  int hip_devices_active = 0;
+
+  if (backend_ctx->hip)
+  {
+    // device count
+
+    if (hc_hipDeviceGetCount (hashcat_ctx, &hip_devices_cnt) == -1)
+    {
+      hip_close (hashcat_ctx);
+    }
+
+    backend_ctx->hip_devices_cnt = hip_devices_cnt;
+
+    // device specific
+
+    for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++, backend_devices_idx++)
+    {
+      const u32 device_id = backend_devices_idx;
+
+      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
+
+      device_param->device_id = device_id;
+
+      backend_ctx->backend_device_from_hip[hip_devices_idx] = backend_devices_idx;
+
+      HIPdevice hip_device;
+
+      if (hc_hipDeviceGet (hashcat_ctx, &hip_device, hip_devices_idx) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->hip_device = hip_device;
+
+      device_param->is_cuda   = false;
+      device_param->is_hip    = true;
+      device_param->is_opencl = false;
+
+      device_param->use_opencl12 = false;
+      device_param->use_opencl20 = false;
+      device_param->use_opencl21 = false;
+
+      // device_name
+
+      char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY);
+
+      if (hc_hipDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        hcfree (device_name);
+        continue;
+      }
+
+      device_param->device_name = device_name;
+
+      hc_string_trim_leading (device_name);
+
+      hc_string_trim_trailing (device_name);
+
+      // device_processors
+
+      int device_processors = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_processors = device_processors;
+
+      // device_global_mem, device_maxmem_alloc, device_available_mem
+
+      size_t bytes = 0;
+
+      if (hc_hipDeviceTotalMem (hashcat_ctx, &bytes, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_global_mem = (u64) bytes;
+
+      device_param->device_maxmem_alloc = (u64) bytes;
+
+      device_param->device_available_mem = 0;
+
+      // warp size
+
+      int hip_warp_size = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->hip_warp_size = hip_warp_size;
+
+      // sm_minor, sm_major
+
+      int sm_major = 0;
+      int sm_minor = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->sm_major = sm_major;
+      device_param->sm_minor = sm_minor;
+
+      // device_maxworkgroup_size
+
+      int device_maxworkgroup_size = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_maxworkgroup_size = device_maxworkgroup_size;
+
+      // max_clock_frequency
+
+      int device_maxclock_frequency = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, HIP_DEVICE_ATTRIBUTE_CLOCK_RATE, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_maxclock_frequency = device_maxclock_frequency / 1000;
+
+      // pcie_bus, pcie_device, pcie_function
+
+      int pci_domain_id_nv  = 0;
+      int pci_bus_id_nv     = 0;
+      int pci_slot_id_nv    = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->pcie_domain   = (u8) (pci_domain_id_nv);
+      device_param->pcie_bus      = (u8) (pci_bus_id_nv);
+      device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
+      device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
+
+      // kernel_exec_timeout
+
+      int kernel_exec_timeout = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->kernel_exec_timeout = kernel_exec_timeout;
+
+      // max_shared_memory_per_block
+
+      int max_shared_memory_per_block = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (max_shared_memory_per_block < 32768)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1);
+
+        device_param->skipped = true;
+      }
+
+      device_param->device_local_mem_size = max_shared_memory_per_block;
+
+      // device_max_constant_buffer_size
+
+      int device_max_constant_buffer_size = 0;
+
+      if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      // TODO: broken on HIP?
+
+      device_max_constant_buffer_size = 65536;
+
+      if (device_max_constant_buffer_size < 65536)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
+
+        device_param->skipped = true;
+      }
+
+      // some attributes have to be hardcoded values because they are used for instance in the build options
+
+      device_param->device_local_mem_type     = CL_LOCAL;
+      device_param->opencl_device_type        = CL_DEVICE_TYPE_GPU;
+      device_param->opencl_device_vendor_id   = VENDOR_ID_AMD_USE_HIP;
+      device_param->opencl_platform_vendor_id = VENDOR_ID_AMD_USE_HIP;
+
+      // or in the cached kernel checksum
+
+      device_param->opencl_device_version     = "";
+      device_param->opencl_driver_version     = "";
+
+      // or just to make sure they are not NULL
+
+      device_param->opencl_device_vendor     = "";
+      device_param->opencl_device_c_version  = "";
+
+      // skipped
+
+      if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
+      {
+        device_param->skipped = true;
+      }
+
+      #if !defined (__APPLE__)
+      if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0)
+      {
+        device_param->skipped = true;
+      }
+      #endif
+
+      if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD_USE_HIP) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
+      {
+         need_adl = true;
+
+         #if defined (__linux__)
+         need_sysfs_amdgpu = true;
+         #endif
+      }
+
+      // CPU burning loop damper
+      // Value is given as number between 0-100
+      // By default 8%
+      // in theory not needed with HIP
+
+      device_param->spin_damp = (double) user_options->spin_damp / 100;
+
+      // common driver check
+
+      if (device_param->skipped == false)
+      {
+        if ((user_options->force == false) && (user_options->backend_info == false))
+        {
+          // HIPDA does not support query nvidia driver version, therefore no driver checks here
+          // IF needed, could be retrieved using nvmlSystemGetDriverVersion()
+
+          if (device_param->sm_major < 5)
+          {
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated HIPDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             HIPDA compute capability version 5.0 (Maxwell) or higher.");
+          }
+
+          if (device_param->kernel_exec_timeout != 0)
+          {
+            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+            if (user_options->quiet == false) event_log_advice (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+          }
+        }
+
+        // activate device moved below, at end
+      }
+
+      // instruction set
+
+      // bcrypt optimization?
+      //const int rc_cuCtxSetCacheConfig = hc_hipCtxSetCacheConfig (hashcat_ctx, HIP_FUNC_CACHE_PREFER_SHARED);
+      //
+      //if (rc_cuCtxSetCacheConfig == -1) return -1;
+
+      // const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
+
+      device_param->has_add   = false;
+      device_param->has_addc  = false;
+      device_param->has_sub   = false;
+      device_param->has_subc  = false;
+      device_param->has_bfe   = false;
+      device_param->has_lop3  = false;
+      device_param->has_mov64 = false;
+      device_param->has_prmt  = false;
+
+      // device_available_mem
+
+      HIPcontext hip_context;
+
+      if (hc_hipCtxCreate (hashcat_ctx, &hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipCtxPushCurrent (hashcat_ctx, hip_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      size_t free  = 0;
+      size_t total = 0;
+
+      if (hc_hipMemGetInfo (hashcat_ctx, &free, &total) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_available_mem = (u64) free;
+
+      if (hc_hipCtxPopCurrent (hashcat_ctx, &hip_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipCtxDestroy (hashcat_ctx, hip_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      /**
+       * activate device
+       */
+
+      if (device_param->skipped == false) hip_devices_active++;
+    }
+  }
+
+  backend_ctx->hip_devices_cnt     = hip_devices_cnt;
+  backend_ctx->hip_devices_active  = hip_devices_active;
+
+  // OCL
+
   int opencl_devices_cnt    = 0;
   int opencl_devices_active = 0;
 
@@ -6087,8 +8431,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         //device_param->opencl_platform = opencl_platform;
 
-        device_param->is_cuda = false;
-
+        device_param->is_cuda   = false;
+        device_param->is_hip    = false;
         device_param->is_opencl = true;
 
         // store opencl platform i
@@ -6829,7 +9173,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
             device_param->spin_damp = (double) user_options->spin_damp / 100;
 
-
             if (user_options->stdout_flag == false)
             {
               // recommend CUDA
@@ -7065,12 +9408,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   // all devices combined go into backend_* variables
 
-  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + opencl_devices_cnt;
-  backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
+  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + hip_devices_cnt    + opencl_devices_cnt;
+  backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + opencl_devices_active;
 
   // find duplicate devices
 
-  //if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
+  //if ((cuda_devices_cnt > 0) && (hip_devices_cnt > 0) && (opencl_devices_cnt > 0))
   //{
     // using force here enables both devices, which is the worst possible outcome
     // many users force by default, so this is not a good idea
@@ -7171,6 +9514,31 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     */
   }
 
+  if (backend_ctx->hip)
+  {
+    // TODO HIP?
+    // Maybe all devices supported by hip have these instructions guaranteed?
+
+    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
+    {
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
+
+      if (device_param->is_hip == false) continue;
+
+      device_param->has_vadd     = true;
+      device_param->has_vaddc    = true;
+      device_param->has_vadd_co  = true;
+      device_param->has_vaddc_co = true;
+      device_param->has_vsub     = true;
+      device_param->has_vsubb    = true;
+      device_param->has_vsub_co  = true;
+      device_param->has_vsubb_co = true;
+      device_param->has_vadd3    = true;
+      device_param->has_vbfe     = true;
+      device_param->has_vperm    = true;
+    }
+  }
+
   if (backend_ctx->ocl)
   {
     for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
@@ -7449,6 +9817,8 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
   backend_ctx->backend_devices_active = 0;
   backend_ctx->cuda_devices_cnt       = 0;
   backend_ctx->cuda_devices_active    = 0;
+  backend_ctx->hip_devices_cnt        = 0;
+  backend_ctx->hip_devices_active     = 0;
   backend_ctx->opencl_devices_cnt     = 0;
   backend_ctx->opencl_devices_active  = 0;
 
@@ -7657,6 +10027,66 @@ static int get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, C
   return 0;
 }
 
+static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u32 *result)
+{
+  int max_threads_per_block;
+
+  if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1;
+
+  *result = (u32) max_threads_per_block;
+
+  return 0;
+}
+
+static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result)
+{
+  int shared_size_bytes;
+
+  if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1;
+
+  *result = (u64) shared_size_bytes;
+
+  return 0;
+}
+
+/*
+not supported because there's no cuFuncSetAttribute equivalent
+
+static int get_hip_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result)
+{
+  // AFAIK there's no way to query the maximum value for dynamic shared memory available (because it depends on kernel code).
+  // let's brute force it, therefore workaround the hashcat wrapper of cuFuncSetAttribute()
+
+  #define MAX_ASSUMED_SHARED (1024 * 1024)
+
+  u64 dynamic_shared_size_bytes = 0;
+
+  for (int i = 1; i <= MAX_ASSUMED_SHARED; i++)
+  {
+    backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+    HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+    const HIPresult HIP_err = hip->hipFuncSetAttribute (function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, i);
+
+    if (HIP_err == HIP_SUCCESS)
+    {
+      dynamic_shared_size_bytes = i;
+
+      continue;
+    }
+
+    break;
+  }
+
+  *result = dynamic_shared_size_bytes;
+
+  if (hc_hipFuncSetAttribute (hashcat_ctx, function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, 0) == -1) return -1;
+
+  return 0;
+}
+*/
+
 static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
 {
   size_t work_group_size = 0;
@@ -7762,6 +10192,12 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
         gpu_prefered_thread_count = 32;
       }
 
+      kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
+    }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      u32 gpu_prefered_thread_count = 64;
+
       kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
     }
   }
@@ -7774,7 +10210,7 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
   return kernel_threads;
 }
 
-static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module)
+static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, HIPmodule *hip_module)
 {
   const hashconfig_t    *hashconfig    = hashcat_ctx->hashconfig;
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
@@ -8053,6 +10489,246 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       hcfree (binary);
     }
 
+    if (device_param->is_hip == true)
+    {
+      hiprtcProgram program;
+
+      if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
+
+      char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
+
+      //hiprtc_options[0] = "--restrict";
+      //hiprtc_options[1] = "--device-as-default-execution-space";
+      //hiprtc_options[2] = "--gpu-architecture";
+
+      //hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
+
+      hiprtc_options[0] = "--gpu-max-threads-per-block=64";
+      hiprtc_options[1] = "";
+      hiprtc_options[2] = "";
+      hiprtc_options[3] = "";
+
+      hiprtc_options[4] = "-I";
+      hiprtc_options[5] = folder_config->cpath_real;
+
+      char *hiprtc_options_string = hcstrdup (build_options_buf);
+
+      const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6);
+
+      const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options);
+
+      hcfree (hiprtc_options_string);
+      hcfree (hiprtc_options);
+
+      size_t build_log_size = 0;
+
+      hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+
+      #if defined (DEBUG)
+      if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1))
+      #else
+      if (rc_hiprtcCompileProgram == -1)
+      #endif
+      {
+        char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+        if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1)
+        {
+          hcfree (build_log);
+
+          return false;
+        }
+
+        build_log[build_log_size] = 0;
+
+        puts (build_log);
+
+        hcfree (build_log);
+      }
+
+      if (rc_hiprtcCompileProgram == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+
+        return false;
+      }
+
+      size_t binary_size = 0;
+
+      if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false;
+
+      char *binary = (char *) hcmalloc (binary_size);
+
+      if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false;
+
+      if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
+
+      #define LOG_SIZE 8192
+
+      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+
+      int mod_cnt = 6;
+
+      HIPjit_option mod_opts[7];
+      void *mod_vals[7];
+
+      mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT;
+      mod_vals[0] = (void *) 0;
+
+      mod_opts[1] = HIP_JIT_LOG_VERBOSE;
+      mod_vals[1] = (void *) 1;
+
+      mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER;
+      mod_vals[2] = (void *) mod_info_log;
+
+      mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[3] = (void *) LOG_SIZE;
+
+      mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER;
+      mod_vals[4] = (void *) mod_error_log;
+
+      mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[5] = (void *) LOG_SIZE;
+
+      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
+      {
+        mod_opts[6] = HIP_JIT_MAX_REGISTERS;
+        mod_vals[6] = (void *) 128;
+
+        mod_cnt++;
+      }
+
+      #if defined (WITH_HIPBIN)
+
+      char *jit_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+
+      int jit_cnt = 6;
+
+      HIPjit_option jit_opts[7];
+      void *jit_vals[7];
+
+      jit_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT;
+      jit_vals[0] = (void *) 0;
+
+      jit_opts[1] = HIP_JIT_LOG_VERBOSE;
+      jit_vals[1] = (void *) 1;
+
+      jit_opts[2] = HIP_JIT_INFO_LOG_BUFFER;
+      jit_vals[2] = (void *) jit_info_log;
+
+      jit_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      jit_vals[3] = (void *) LOG_SIZE;
+
+      jit_opts[4] = HIP_JIT_ERROR_LOG_BUFFER;
+      jit_vals[4] = (void *) jit_error_log;
+
+      jit_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      jit_vals[5] = (void *) LOG_SIZE;
+
+      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
+      {
+        jit_opts[6] = HIP_JIT_MAX_REGISTERS;
+        jit_vals[6] = (void *) 128;
+
+        jit_cnt++;
+      }
+
+      HIPlinkState state;
+
+      if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", jit_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      if (hc_cuLinkAddData (hashcat_ctx, state, HIP_JIT_INPUT_CODE, binary, binary_size, kernel_name, 0, NULL, NULL) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", jit_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      void *cubin = NULL;
+
+      size_t cubin_size = 0;
+
+      if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", jit_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", jit_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      if (hc_cuModuleLoadDataEx (hashcat_ctx, hip_module, cubin, mod_cnt, mod_opts, mod_vals) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      if (cache_disable == false)
+      {
+        if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false;
+      }
+
+      if (hc_hipLinkDestroy (hashcat_ctx, state) == -1) return false;
+
+      hcfree (jit_info_log);
+      hcfree (jit_error_log);
+
+      #else
+
+      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      if (cache_disable == false)
+      {
+        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
+      }
+
+      #endif
+
+      hcfree (mod_info_log);
+      hcfree (mod_error_log);
+
+      hcfree (binary);
+    }
+
     if (device_param->is_opencl == true)
     {
       size_t build_log_size = 0;
@@ -8185,6 +10861,63 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       hcfree (mod_error_log);
     }
 
+    if (device_param->is_hip == true)
+    {
+      #define LOG_SIZE 8192
+
+      char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
+      char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1);
+
+      int mod_cnt = 6;
+
+      HIPjit_option mod_opts[7];
+      void *mod_vals[7];
+
+      mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT;
+      mod_vals[0] = (void *) 0;
+
+      mod_opts[1] = HIP_JIT_LOG_VERBOSE;
+      mod_vals[1] = (void *) 1;
+
+      mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER;
+      mod_vals[2] = (void *) mod_info_log;
+
+      mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[3] = (void *) LOG_SIZE;
+
+      mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER;
+      mod_vals[4] = (void *) mod_error_log;
+
+      mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+      mod_vals[5] = (void *) LOG_SIZE;
+
+      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
+      {
+        mod_opts[6] = HIP_JIT_MAX_REGISTERS;
+        mod_vals[6] = (void *) 128;
+
+        mod_cnt++;
+      }
+
+      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
+        event_log_error (hashcat_ctx, "%s", mod_error_log);
+        event_log_error (hashcat_ctx, NULL);
+
+        return false;
+      }
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, "%s", mod_info_log);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+
+      hcfree (mod_info_log);
+      hcfree (mod_error_log);
+    }
+
     if (device_param->is_opencl == true)
     {
       if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false;
@@ -8243,7 +10976,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if ((unstable_warning == true) && (user_options->force == false))
       {
         event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u)", device_id + 1, hashconfig->hash_mode);
-        event_log_warning (hashcat_ctx, "             This is due to a known CUDA/OpenCL runtime/driver issue (not a hashcat issue)");
+        event_log_warning (hashcat_ctx, "             This is due to a known CUDA/HIP/OpenCL runtime/driver issue (not a hashcat issue)");
         event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
         device_param->skipped_warning = true;
@@ -8282,6 +11015,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             vector_width = 1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            // hip does not support this query
+
+            vector_width = 1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1)
@@ -8300,6 +11040,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             vector_width = 1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            // hip does not support this query
+
+            vector_width = 1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL) == -1)
@@ -8513,6 +11260,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           native_threads = 64;
         }
+        else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+        {
+          native_threads = 64;
+        }
         else
         {
           native_threads = 32;
@@ -8545,6 +11296,27 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->skipped = true;
         continue;
       }
+
+      if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
     }
 
     if (device_param->is_opencl == true)
@@ -8592,6 +11364,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    /**
+     * create stream for HIP devices
+     */
+
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, HIP_STREAM_DEFAULT) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+
     /**
      * create events for CUDA devices
      */
@@ -8611,6 +11396,25 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    /**
+     * create events for HIP devices
+     */
+
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, HIP_EVENT_BLOCKING_SYNC) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, HIP_EVENT_BLOCKING_SYNC) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+
     /**
      * create input buffers on device : calculate size of fixed memory buffers
      */
@@ -8726,7 +11530,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     int build_options_len = 0;
 
-    if (device_param->is_cuda == true)
+    if ((device_param->is_cuda == true) || (device_param->is_hip == true))
     {
       // using a path with a space will break nvrtc_make_options_array_from_string()
       // we add it to options array in a clean way later
@@ -8789,9 +11593,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
 
-    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s",
+    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s",
       backend_ctx->comptime,
       backend_ctx->cuda_driver_version,
+      backend_ctx->hip_driver_version,
       device_param->is_opencl,
       device_param->opencl_platform_vendor_id,
       device_param->device_name,
@@ -8861,7 +11666,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
 
-      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared);
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared);
 
       if (rc_load_kernel == false)
       {
@@ -8928,6 +11733,64 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size;
       }
 
+      if (device_param->is_hip == true)
+      {
+        // GPU memset
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size;
+
+        //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem),   device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1;
+        //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
+        //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
+
+        // GPU autotune init
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size;
+
+        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
+        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+
+        // GPU decompress
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size;
+
+        // GPU utf8 to utf16le conversion
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size;
+      }
+
       if (device_param->is_opencl == true)
       {
         // GPU memset
@@ -9029,9 +11892,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
 
-      const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s",
+      const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s",
         backend_ctx->comptime,
         backend_ctx->cuda_driver_version,
+        backend_ctx->hip_driver_version,
         device_param->is_opencl,
         device_param->opencl_platform_vendor_id,
         device_param->device_name,
@@ -9077,7 +11941,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
        * load kernel
        */
 
-      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module);
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module);
 
       if (rc_load_kernel == false)
       {
@@ -9123,7 +11987,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
 
-        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp);
+        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp);
 
         if (rc_load_kernel == false)
         {
@@ -9172,7 +12036,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
 
-        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp);
+        const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp);
 
         if (rc_load_kernel == false)
         {
@@ -9354,6 +12218,116 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d,    bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs,     size_plains)             == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf,    size_digests)            == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown,  size_shown)              == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs,      size_salts)              == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result,         size_results)            == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf,     size_extra_buffer / 4)   == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests)         == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf,   size_st_salts)           == -1) return -1;
+
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf,     size_digests)            == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_salt_bufs,   hashes->salts_buf,       size_salts)              == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules,   size_rules) == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy = 0;
+
+            if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1;
+          }
+          else
+          {
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+          }
+
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs,          size_combs)      == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c,        size_combs)      == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf,   size_root_css)   == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs,            size_bfs)        == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf,   size_root_css)   == -1) return -1;
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy = 0;
+
+            if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1;
+
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c,           size_tm)       == -1) return -1;
+          }
+          else
+          {
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c,          size_bfs)      == -1) return -1;
+            if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c,           size_tm)       == -1) return -1;
+          }
+        }
+      }
+
+      if (size_esalts)
+      {
+        if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1;
+
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts) == -1) return -1;
+      }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts)   == -1) return -1;
+
+        if (size_esalts)
+        {
+          if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1;
+
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts) == -1) return -1;
+        }
+      }
+    }
+
     if (device_param->is_opencl == true)
     {
       if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a)    == -1) return -1;
@@ -9488,6 +12462,34 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf;
     }
 
+    if (device_param->is_hip == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf;
+      device_param->kernel_params[ 1] = &device_param->hip_d_rules_c;
+      device_param->kernel_params[ 2] = &device_param->hip_d_combs_c;
+      device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks;
+      device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d;
+      device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a;
+      device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b;
+      device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c;
+      device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d;
+      device_param->kernel_params[14] = &device_param->hip_d_plain_bufs;
+      device_param->kernel_params[15] = &device_param->hip_d_digests_buf;
+      device_param->kernel_params[16] = &device_param->hip_d_digests_shown;
+      device_param->kernel_params[17] = &device_param->hip_d_salt_bufs;
+      device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs;
+      device_param->kernel_params[19] = &device_param->hip_d_result;
+      device_param->kernel_params[20] = &device_param->hip_d_extra0_buf;
+      device_param->kernel_params[21] = &device_param->hip_d_extra1_buf;
+      device_param->kernel_params[22] = &device_param->hip_d_extra2_buf;
+      device_param->kernel_params[23] = &device_param->hip_d_extra3_buf;
+    }
+
     if (device_param->is_opencl == true)
     {
       device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf;
@@ -9549,6 +12551,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
         }
 
+        if (device_param->is_hip == true)
+        {
+          device_param->kernel_params_mp[0] = &device_param->hip_d_combs;
+        }
+
         if (device_param->is_opencl == true)
         {
           device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
@@ -9563,6 +12570,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
           }
 
+          if (device_param->is_hip == true)
+          {
+            device_param->kernel_params_mp[0] = &device_param->hip_d_combs;
+          }
+
           if (device_param->is_opencl == true)
           {
             device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
@@ -9582,6 +12594,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf;
       }
 
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf;
+        device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf;
+      }
+
       if (device_param->is_opencl == true)
       {
         device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf;
@@ -9606,12 +12624,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
                                                   // ? &device_param->opencl_d_pws_buf
                                                   // : &device_param->opencl_d_pws_amp_buf;
+
       if (device_param->is_cuda == true)
       {
         device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf;
         device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf;
       }
 
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf;
+      }
+
       if (device_param->is_opencl == true)
       {
         device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf;
@@ -9640,6 +12665,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf;
       }
 
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs;
+        device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf;
+      }
+
       if (device_param->is_opencl == true)
       {
         device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs;
@@ -9666,6 +12698,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c;
       }
 
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c;
+        device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c;
+        device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c;
+      }
+
       if (device_param->is_opencl == true)
       {
         device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf;
@@ -9684,6 +12725,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c;
       }
 
+      if (device_param->is_hip == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c;
+      }
+
       if (device_param->is_opencl == true)
       {
         device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
@@ -9719,6 +12766,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
                                                         // : &device_param->cuda_d_pws_amp_buf;
     }
 
+    if (device_param->is_hip == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? &device_param->hip_d_pws_buf
+                                                        // : &device_param->hip_d_pws_amp_buf;
+    }
+
     if (device_param->is_opencl == true)
     {
       device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx;
@@ -10336,6 +13392,608 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    if (device_param->is_hip == true)
+    {
+      char kernel_name[64] = { 0 };
+
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
+        {
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size;
+          }
+        }
+        else
+        {
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
+
+            if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) return -1;
+
+            if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1;
+
+            if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1;
+
+            //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size;
+          }
+        }
+
+        if (user_options->slow_candidates == true)
+        {
+        }
+        else
+        {
+          if (user_options->attack_mode == ATTACK_MODE_BF)
+          {
+            if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
+            {
+              snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
+
+              if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1) return -1;
+
+              if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1;
+
+              if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1;
+
+              //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_dynamic_local_mem_size_tm) == -1) return -1;
+
+              device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size;
+            }
+          }
+        }
+      }
+      else
+      {
+        // kernel1
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size;
+
+        // kernel2
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size;
+
+        // kernel3
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
+
+        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1;
+
+        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1;
+
+        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1;
+
+        //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size;
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
+        {
+          // kernel2p
+
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2p, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_wgs2p) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size;
+        }
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
+        {
+          // kernel2e
+
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_dynamic_local_mem_size2e) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size;
+        }
+
+        // kernel12
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_dynamic_local_mem_size12) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size;
+        }
+
+        // kernel23
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_dynamic_local_mem_size23) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size;
+        }
+
+        // init2
+
+        if (hashconfig->opts_type & OPTS_TYPE_INIT2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_dynamic_local_mem_size_init2) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size;
+        }
+
+        // loop2 prepare
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2p, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size;
+        }
+
+        // loop2
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_dynamic_local_mem_size_loop2) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size;
+        }
+
+        // aux1
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX1)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_dynamic_local_mem_size_aux1) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size;
+        }
+
+        // aux2
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_dynamic_local_mem_size_aux2) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size;
+        }
+
+        // aux3
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX3)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_dynamic_local_mem_size_aux3) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size;
+        }
+
+        // aux4
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX4)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_dynamic_local_mem_size_aux4) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size;
+        }
+      }
+
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
+
+      // MP start
+
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if (user_options->attack_mode == ATTACK_MODE_BF)
+        {
+          // mp_l
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_dynamic_local_mem_size_mp_l) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size;
+
+          // mp_r
+
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_dynamic_local_mem_size_mp_r) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size;
+
+          if (user_options->attack_mode == ATTACK_MODE_BF)
+          {
+            if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
+            {
+              //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
+              //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
+            }
+          }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+        {
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size;
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+        {
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size;
+        }
+      }
+
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1) return -1;
+
+          if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1;
+
+          if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1;
+
+          //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_dynamic_local_mem_size_amp) == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size;
+        }
+
+        /*
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          for (u32 i = 0; i < 5; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+
+          for (u32 i = 5; i < 6; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+
+          for (u32 i = 6; i < 7; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+        }
+        */
+      }
+
+      // zero some data buffers
+
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs,    device_param->size_plains)  == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown)   == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result,        device_param->size_results) == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs,          size_combs)       == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c,        size_combs)       == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css)  == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs,            size_bfs)         == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c,          size_bfs)         == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c,           size_tm)          == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf,   size_root_css)    == -1) return -1;
+          if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css)  == -1) return -1;
+        }
+      }
+
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2))
+        {
+          /**
+           * prepare mp
+           */
+
+          if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
+
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_buf32[5] = full01;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_buf32[5] = full06;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_buf32[5] = full80;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
+          }
+          else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
+          }
+
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_BF)
+        {
+          /**
+           * prepare mp_r and mp_l
+           */
+
+          device_param->kernel_params_mp_l_buf32[6] = 0;
+          device_param->kernel_params_mp_l_buf32[7] = 0;
+          device_param->kernel_params_mp_l_buf32[8] = 0;
+
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_l_buf32[6] = full01;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_l_buf32[6] = full06;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_l_buf32[6] = full80;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
+
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+        }
+      }
+    }
+
     if (device_param->is_opencl == true)
     {
       // GPU memset
@@ -10961,11 +14619,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         }
         else
         {
+          device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
           device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
         }
       }
     }
 
+    // we
+
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
+      device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
+    }
+
     /**
      * now everything that depends on threads and accel, basically dynamic workload
      */
@@ -11208,6 +14875,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks,         device_param->size_hooks)    == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_buf,      size_pws)      == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_amp_buf,  size_pws_amp)  == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_comp_buf, size_pws_comp) == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_idx,      size_pws_idx)  == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tmps,         size_tmps)     == -1) return -1;
+      if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_hooks,        size_hooks)    == -1) return -1;
+
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_buf,       device_param->size_pws)      == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_amp_buf,   device_param->size_pws_amp)  == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_comp_buf,  device_param->size_pws_comp) == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_idx,       device_param->size_pws_idx)  == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps,          device_param->size_tmps)     == -1) return -1;
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks,         device_param->size_hooks)    == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->opencl_d_pws_buf)      == -1) return -1;
@@ -11279,6 +14963,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params[ 5] = &device_param->cuda_d_hooks;
     }
 
+    if (device_param->is_hip == true)
+    {
+      device_param->kernel_params[ 0] = &device_param->hip_d_pws_buf;
+      device_param->kernel_params[ 4] = &device_param->hip_d_tmps;
+      device_param->kernel_params[ 5] = &device_param->hip_d_hooks;
+    }
+
     if (device_param->is_opencl == true)
     {
       device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf;
@@ -11308,6 +14999,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? &device_param->hip_d_pws_buf
+                                              : &device_param->hip_d_pws_amp_buf;
+
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -11330,6 +15030,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? &device_param->hip_d_pws_buf
+                                              : &device_param->hip_d_pws_amp_buf;
+
+          //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -11355,6 +15064,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          device_param->kernel_params_amp[0] = &device_param->hip_d_pws_buf;
+          device_param->kernel_params_amp[1] = &device_param->hip_d_pws_amp_buf;
+
+          //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
+          //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf;
@@ -11379,6 +15097,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      device_param->kernel_params_decompress[0] = &device_param->hip_d_pws_idx;
+      device_param->kernel_params_decompress[1] = &device_param->hip_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                ? &device_param->hip_d_pws_buf
+                                                : &device_param->hip_d_pws_amp_buf;
+
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx;
@@ -11392,6 +15123,26 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]) == -1) return -1;
     }
 
+    // context
+
+    if (device_param->is_cuda == true)
+    {
+      if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+
     hardware_power_all += device_param->hardware_power;
 
     EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
@@ -11546,6 +15297,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->cuda_function_aux3        = NULL;
       device_param->cuda_function_aux4        = NULL;
 
+      device_param->cuda_event1               = NULL;
+      device_param->cuda_event2               = NULL;
+
+      device_param->cuda_stream               = NULL;
+
       device_param->cuda_module               = NULL;
       device_param->cuda_module_mp            = NULL;
       device_param->cuda_module_amp           = NULL;
@@ -11554,6 +15310,132 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->cuda_context              = NULL;
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (device_param->hip_d_pws_buf)        hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_buf);
+      if (device_param->hip_d_pws_amp_buf)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_amp_buf);
+      if (device_param->hip_d_pws_comp_buf)   hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_comp_buf);
+      if (device_param->hip_d_pws_idx)        hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_idx);
+      if (device_param->hip_d_rules)          hc_hipMemFree (hashcat_ctx, device_param->hip_d_rules);
+      //if (device_param->hip_d_rules_c)        hc_hipMemFree (hashcat_ctx, device_param->hip_d_rules_c);
+      if (device_param->hip_d_combs)          hc_hipMemFree (hashcat_ctx, device_param->hip_d_combs);
+      if (device_param->hip_d_combs_c)        hc_hipMemFree (hashcat_ctx, device_param->hip_d_combs_c);
+      if (device_param->hip_d_bfs)            hc_hipMemFree (hashcat_ctx, device_param->hip_d_bfs);
+      //if (device_param->hip_d_bfs_c)          hc_hipMemFree (hashcat_ctx, device_param->hip_d_bfs_c);
+      if (device_param->hip_d_bitmap_s1_a)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_a);
+      if (device_param->hip_d_bitmap_s1_b)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_b);
+      if (device_param->hip_d_bitmap_s1_c)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_c);
+      if (device_param->hip_d_bitmap_s1_d)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_d);
+      if (device_param->hip_d_bitmap_s2_a)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_a);
+      if (device_param->hip_d_bitmap_s2_b)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_b);
+      if (device_param->hip_d_bitmap_s2_c)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_c);
+      if (device_param->hip_d_bitmap_s2_d)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_d);
+      if (device_param->hip_d_plain_bufs)     hc_hipMemFree (hashcat_ctx, device_param->hip_d_plain_bufs);
+      if (device_param->hip_d_digests_buf)    hc_hipMemFree (hashcat_ctx, device_param->hip_d_digests_buf);
+      if (device_param->hip_d_digests_shown)  hc_hipMemFree (hashcat_ctx, device_param->hip_d_digests_shown);
+      if (device_param->hip_d_salt_bufs)      hc_hipMemFree (hashcat_ctx, device_param->hip_d_salt_bufs);
+      if (device_param->hip_d_esalt_bufs)     hc_hipMemFree (hashcat_ctx, device_param->hip_d_esalt_bufs);
+      if (device_param->hip_d_tmps)           hc_hipMemFree (hashcat_ctx, device_param->hip_d_tmps);
+      if (device_param->hip_d_hooks)          hc_hipMemFree (hashcat_ctx, device_param->hip_d_hooks);
+      if (device_param->hip_d_result)         hc_hipMemFree (hashcat_ctx, device_param->hip_d_result);
+      if (device_param->hip_d_extra0_buf)     hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra0_buf);
+      if (device_param->hip_d_extra1_buf)     hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra1_buf);
+      if (device_param->hip_d_extra2_buf)     hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra2_buf);
+      if (device_param->hip_d_extra3_buf)     hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra3_buf);
+      if (device_param->hip_d_root_css_buf)   hc_hipMemFree (hashcat_ctx, device_param->hip_d_root_css_buf);
+      if (device_param->hip_d_markov_css_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_markov_css_buf);
+      if (device_param->hip_d_tm_c)           hc_hipMemFree (hashcat_ctx, device_param->hip_d_tm_c);
+      if (device_param->hip_d_st_digests_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_digests_buf);
+      if (device_param->hip_d_st_salts_buf)   hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_salts_buf);
+      if (device_param->hip_d_st_esalts_buf)  hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_esalts_buf);
+
+      if (device_param->hip_event1)           hc_hipEventDestroy (hashcat_ctx, device_param->hip_event1);
+      if (device_param->hip_event2)           hc_hipEventDestroy (hashcat_ctx, device_param->hip_event2);
+
+      if (device_param->hip_stream)           hc_hipStreamDestroy (hashcat_ctx, device_param->hip_stream);
+
+      if (device_param->hip_module)           hc_hipModuleUnload (hashcat_ctx, device_param->hip_module);
+      if (device_param->hip_module_mp)        hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_mp);
+      if (device_param->hip_module_amp)       hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp);
+      if (device_param->hip_module_shared)    hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared);
+
+      if (device_param->hip_context)          hc_hipCtxDestroy (hashcat_ctx, device_param->hip_context);
+
+      device_param->hip_d_pws_buf            = 0;
+      device_param->hip_d_pws_amp_buf        = 0;
+      device_param->hip_d_pws_comp_buf       = 0;
+      device_param->hip_d_pws_idx            = 0;
+      device_param->hip_d_rules              = 0;
+      device_param->hip_d_rules_c            = 0;
+      device_param->hip_d_combs              = 0;
+      device_param->hip_d_combs_c            = 0;
+      device_param->hip_d_bfs                = 0;
+      device_param->hip_d_bfs_c              = 0;
+      device_param->hip_d_bitmap_s1_a        = 0;
+      device_param->hip_d_bitmap_s1_b        = 0;
+      device_param->hip_d_bitmap_s1_c        = 0;
+      device_param->hip_d_bitmap_s1_d        = 0;
+      device_param->hip_d_bitmap_s2_a        = 0;
+      device_param->hip_d_bitmap_s2_b        = 0;
+      device_param->hip_d_bitmap_s2_c        = 0;
+      device_param->hip_d_bitmap_s2_d        = 0;
+      device_param->hip_d_plain_bufs         = 0;
+      device_param->hip_d_digests_buf        = 0;
+      device_param->hip_d_digests_shown      = 0;
+      device_param->hip_d_salt_bufs          = 0;
+      device_param->hip_d_esalt_bufs         = 0;
+      device_param->hip_d_tmps               = 0;
+      device_param->hip_d_hooks              = 0;
+      device_param->hip_d_result             = 0;
+      device_param->hip_d_extra0_buf         = 0;
+      device_param->hip_d_extra1_buf         = 0;
+      device_param->hip_d_extra2_buf         = 0;
+      device_param->hip_d_extra3_buf         = 0;
+      device_param->hip_d_root_css_buf       = 0;
+      device_param->hip_d_markov_css_buf     = 0;
+      device_param->hip_d_tm_c               = 0;
+      device_param->hip_d_st_digests_buf     = 0;
+      device_param->hip_d_st_salts_buf       = 0;
+      device_param->hip_d_st_esalts_buf      = 0;
+
+      device_param->hip_function1            = NULL;
+      device_param->hip_function12           = NULL;
+      device_param->hip_function2p           = NULL;
+      device_param->hip_function2            = NULL;
+      device_param->hip_function2e           = NULL;
+      device_param->hip_function23           = NULL;
+      device_param->hip_function3            = NULL;
+      device_param->hip_function4            = NULL;
+      device_param->hip_function_init2       = NULL;
+      device_param->hip_function_loop2p      = NULL;
+      device_param->hip_function_loop2       = NULL;
+      device_param->hip_function_mp          = NULL;
+      device_param->hip_function_mp_l        = NULL;
+      device_param->hip_function_mp_r        = NULL;
+      device_param->hip_function_tm          = NULL;
+      device_param->hip_function_amp         = NULL;
+      device_param->hip_function_memset      = NULL;
+      device_param->hip_function_atinit      = NULL;
+      device_param->hip_function_utf8toutf16le = NULL;
+      device_param->hip_function_decompress  = NULL;
+      device_param->hip_function_aux1        = NULL;
+      device_param->hip_function_aux2        = NULL;
+      device_param->hip_function_aux3        = NULL;
+      device_param->hip_function_aux4        = NULL;
+
+      device_param->hip_event1               = NULL;
+      device_param->hip_event2               = NULL;
+
+      device_param->hip_stream               = NULL;
+
+      device_param->hip_module               = NULL;
+      device_param->hip_module_mp            = NULL;
+      device_param->hip_module_amp           = NULL;
+      device_param->hip_module_shared        = NULL;
+
+      device_param->hip_context              = NULL;
+    }
+
     if (device_param->is_opencl == true)
     {
       if (device_param->opencl_d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf);
@@ -11849,6 +15731,15 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
       if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       for (u32 i = 3; i < 4; i++) { if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]) == -1) return -1; }
@@ -11901,6 +15792,20 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
       if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_uint),  device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+
+      //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       for (u32 i = 3; i < 4; i++) { if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]) == -1) return -1; }
diff --git a/src/dispatch.c b/src/dispatch.c
index 25c40ea7f..337b25fc2 100644
--- a/src/dispatch.c
+++ b/src/dispatch.c
@@ -347,7 +347,12 @@ HC_API_CALL void *thread_calc_stdin (void *p)
 
   if (device_param->is_cuda == true)
   {
-    if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+    if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
   }
 
   if (calc_stdin (hashcat_ctx, device_param) == -1)
@@ -357,6 +362,16 @@ HC_API_CALL void *thread_calc_stdin (void *p)
     status_ctx->devices_status = STATUS_ERROR;
   }
 
+  if (device_param->is_cuda == true)
+  {
+    if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
+  }
+
   return NULL;
 }
 
@@ -1581,7 +1596,12 @@ HC_API_CALL void *thread_calc (void *p)
 
   if (device_param->is_cuda == true)
   {
-    if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+    if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
   }
 
   if (calc (hashcat_ctx, device_param) == -1)
@@ -1591,5 +1611,15 @@ HC_API_CALL void *thread_calc (void *p)
     status_ctx->devices_status = STATUS_ERROR;
   }
 
+  if (device_param->is_cuda == true)
+  {
+    if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
+  }
+
   return NULL;
 }
diff --git a/src/ext_hip.c b/src/ext_hip.c
new file mode 100644
index 000000000..72fb2fbfe
--- /dev/null
+++ b/src/ext_hip.c
@@ -0,0 +1,8 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "ext_hip.h"
diff --git a/src/ext_hiprtc.c b/src/ext_hiprtc.c
new file mode 100644
index 000000000..1ec099ae7
--- /dev/null
+++ b/src/ext_hiprtc.c
@@ -0,0 +1,27 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "ext_hiprtc.h"
+
+int hiprtc_make_options_array_from_string (char *string, char **options)
+{
+  char *saveptr = NULL;
+
+  char *next = strtok_r (string, " ", &saveptr);
+
+  int cnt = 0;
+
+  do
+  {
+    options[cnt] = next;
+
+    cnt++;
+
+  } while ((next = strtok_r ((char *) NULL, " ", &saveptr)) != NULL);
+
+  return cnt;
+}
diff --git a/src/ext_sysfs_cpu.c b/src/ext_sysfs_cpu.c
index ddfd5d4d4..532d284a3 100644
--- a/src/ext_sysfs_cpu.c
+++ b/src/ext_sysfs_cpu.c
@@ -63,7 +63,7 @@ char *hm_SYSFS_CPU_get_syspath_hwmon ()
 
     if (hc_fopen_raw (&fp, path, "rb") == false) continue;
 
-    char buf[16];
+    char buf[32] = { 0 };
 
     const size_t line_len = fgetl (&fp, buf, sizeof (buf));
 
diff --git a/src/hashes.c b/src/hashes.c
index f1ee22334..27aa94370 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -322,6 +322,11 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
       hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
     }
 
+    if (device_param->is_hip == true)
+    {
+      hc_hipMemcpyDtoH (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
+    }
+
     if (device_param->is_opencl == true)
     {
       hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
@@ -481,6 +486,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
   u32 num_cracked = 0;
 
   int CU_rc;
+  int HIP_rc;
   int CL_rc;
 
   if (device_param->is_cuda == true)
@@ -490,6 +496,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
     if (CU_rc == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32));
+
+    if (HIP_rc == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
@@ -516,6 +529,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
       if (CU_rc == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t));
+
+      if (HIP_rc == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
@@ -573,6 +593,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
           if (CU_rc == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), &hashes->digests_shown_tmp[salt_buf->digests_offset], salt_buf->digests_cnt * sizeof (u32));
+
+          if (HIP_rc == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
@@ -611,6 +638,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
       if (CU_rc == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_result, &num_cracked, sizeof (u32));
+
+      if (HIP_rc == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
diff --git a/src/hwmon.c b/src/hwmon.c
index bc95f9515..374056d07 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -95,11 +95,11 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -176,11 +176,11 @@ int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -245,7 +245,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_CPU)
     {
@@ -313,7 +313,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
       }
       #endif
 
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -401,11 +401,11 @@ int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
     return 1;
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -499,11 +499,11 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -609,11 +609,11 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -696,11 +696,11 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -800,11 +800,11 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -887,11 +887,11 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
         if (hwmon_ctx->hm_adl)
         {
@@ -1003,11 +1003,11 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
   {
     if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
     {
-      if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+      if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
       }
 
@@ -1382,11 +1382,11 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
           // nothing to do
         }
 
-        if (device_param->is_opencl == true)
+        if ((device_param->is_opencl == true) || (device_param->is_hip == true))
         {
           if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-          if (device_param->opencl_device_vendor_id != VENDOR_ID_AMD) continue;
+          if ((device_param->opencl_device_vendor_id != VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id != VENDOR_ID_AMD_USE_HIP)) continue;
 
           for (int i = 0; i < tmp_in; i++)
           {
@@ -1438,7 +1438,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
           // nothing to do
         }
 
-        if (device_param->is_opencl == true)
+        if ((device_param->is_opencl == true) || (device_param->is_hip == true))
         {
           const u32 device_id = device_param->device_id;
 
@@ -1485,7 +1485,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
           // nothing to do
         }
 
-        if (device_param->is_opencl == true)
+        if ((device_param->is_opencl == true) || (device_param->is_hip == true))
         {
           const u32 device_id = device_param->device_id;
 
@@ -1594,7 +1594,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    if (device_param->is_opencl == true)
+    if ((device_param->is_opencl == true) || (device_param->is_hip == true))
     {
       if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
       {
@@ -1655,7 +1655,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
         }
         #endif
 
-        if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
+        if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) || (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
         {
           hwmon_ctx->hm_device[backend_devices_idx].adl           = hm_adapters_adl[device_id].adl;
           hwmon_ctx->hm_device[backend_devices_idx].sysfs_amdgpu  = hm_adapters_sysfs_amdgpu[device_id].sysfs_amdgpu;
diff --git a/src/modules/module_01460.c b/src/modules/module_01460.c
index 6cb814ed2..f2952aa36 100644
--- a/src/modules/module_01460.c
+++ b/src/modules/module_01460.c
@@ -59,6 +59,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c
index 274d9541a..ea01dab96 100644
--- a/src/modules/module_01500.c
+++ b/src/modules/module_01500.c
@@ -179,6 +179,14 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
       hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
     }
   }
+  // ROCM
+  else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
+    {
+      hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
+    }
+  }
   else
   {
     if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
diff --git a/src/modules/module_01700.c b/src/modules/module_01700.c
index 9a7f1d34f..04f2762c1 100644
--- a/src/modules/module_01700.c
+++ b/src/modules/module_01700.c
@@ -58,6 +58,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_01720.c b/src/modules/module_01720.c
index 6833f4405..3fdc77653 100644
--- a/src/modules/module_01720.c
+++ b/src/modules/module_01720.c
@@ -58,6 +58,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_01722.c b/src/modules/module_01722.c
index 4585e2dbb..3264c5f46 100644
--- a/src/modules/module_01722.c
+++ b/src/modules/module_01722.c
@@ -59,6 +59,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_01800.c b/src/modules/module_01800.c
index 1cc1781c2..aefab6e3b 100644
--- a/src/modules/module_01800.c
+++ b/src/modules/module_01800.c
@@ -438,6 +438,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-fno-unroll-loops");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_03000.c b/src/modules/module_03000.c
index a3373e09b..c9b616ab5 100644
--- a/src/modules/module_03000.c
+++ b/src/modules/module_03000.c
@@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_05200.c b/src/modules/module_05200.c
index 470411378..6fb3f08d8 100644
--- a/src/modules/module_05200.c
+++ b/src/modules/module_05200.c
@@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_06211.c b/src/modules/module_06211.c
index b7aa35874..5cb417d26 100644
--- a/src/modules/module_06211.c
+++ b/src/modules/module_06211.c
@@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
-char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
-{
-  char *jit_build_options = NULL;
-
-  // Extra treatment for Apple systems
-  if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
-  {
-    return jit_build_options;
-  }
-
-  // ROCM
-  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
-  {
-    hc_asprintf (&jit_build_options, "-D _unroll");
-  }
-
-  return jit_build_options;
-}
-
 bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool potfile_disable = true;
@@ -303,7 +284,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_06212.c b/src/modules/module_06212.c
index 9ac3487c7..ceb18f192 100644
--- a/src/modules/module_06212.c
+++ b/src/modules/module_06212.c
@@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
-char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
-{
-  char *jit_build_options = NULL;
-
-  // Extra treatment for Apple systems
-  if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
-  {
-    return jit_build_options;
-  }
-
-  // ROCM
-  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
-  {
-    hc_asprintf (&jit_build_options, "-D _unroll");
-  }
-
-  return jit_build_options;
-}
-
 bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool potfile_disable = true;
@@ -303,7 +284,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_06213.c b/src/modules/module_06213.c
index 04430ec31..20323fc62 100644
--- a/src/modules/module_06213.c
+++ b/src/modules/module_06213.c
@@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
-char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
-{
-  char *jit_build_options = NULL;
-
-  // Extra treatment for Apple systems
-  if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
-  {
-    return jit_build_options;
-  }
-
-  // ROCM
-  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
-  {
-    hc_asprintf (&jit_build_options, "-D _unroll");
-  }
-
-  return jit_build_options;
-}
-
 bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool potfile_disable = true;
@@ -301,7 +282,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_06800.c b/src/modules/module_06800.c
index 5a79ca8e7..0f25fa29d 100644
--- a/src/modules/module_06800.c
+++ b/src/modules/module_06800.c
@@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_07400.c b/src/modules/module_07400.c
index f8ebca33d..3be47f898 100644
--- a/src/modules/module_07400.c
+++ b/src/modules/module_07400.c
@@ -244,6 +244,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_07401.c b/src/modules/module_07401.c
index b96318d8e..ba71bf179 100644
--- a/src/modules/module_07401.c
+++ b/src/modules/module_07401.c
@@ -245,6 +245,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_07500.c b/src/modules/module_07500.c
index 931cc6b47..1681fb4a8 100644
--- a/src/modules/module_07500.c
+++ b/src/modules/module_07500.c
@@ -80,6 +80,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
         native_threads = 64;
       }
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      if (device_param->device_local_mem_size < 49152)
+      {
+        native_threads = 32;
+      }
+      else
+      {
+        native_threads = 64;
+      }
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_07900.c b/src/modules/module_07900.c
index 67944eb5a..a51efecd9 100644
--- a/src/modules/module_07900.c
+++ b/src/modules/module_07900.c
@@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_08200.c b/src/modules/module_08200.c
index c092919a3..617b80972 100644
--- a/src/modules/module_08200.c
+++ b/src/modules/module_08200.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "1Password, cloudkeychain";
 static const u64   KERN_TYPE      = 8200;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
diff --git a/src/modules/module_08700.c b/src/modules/module_08700.c
index 6f75c6e02..be902d527 100644
--- a/src/modules/module_08700.c
+++ b/src/modules/module_08700.c
@@ -60,6 +60,19 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  char *jit_build_options = NULL;
+
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-fno-unroll-loops");
+  }
+
+  return jit_build_options;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -179,7 +192,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = module_jit_build_options;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_09600.c b/src/modules/module_09600.c
index abfe9fdee..8ed0888cb 100644
--- a/src/modules/module_09600.c
+++ b/src/modules/module_09600.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_DOCUMENTS;
 static const char *HASH_NAME      = "MS Office 2013";
 static const u64   KERN_TYPE      = 9600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_DEEP_COMP_KERNEL;
@@ -84,6 +85,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_09700.c b/src/modules/module_09700.c
index f5e2f1138..cad911186 100644
--- a/src/modules/module_09700.c
+++ b/src/modules/module_09700.c
@@ -77,6 +77,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_09710.c b/src/modules/module_09710.c
index 250bc3863..033f77ea0 100644
--- a/src/modules/module_09710.c
+++ b/src/modules/module_09710.c
@@ -77,6 +77,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_09720.c b/src/modules/module_09720.c
index 7db204dc6..04e99201f 100644
--- a/src/modules/module_09720.c
+++ b/src/modules/module_09720.c
@@ -58,6 +58,41 @@ static const char *SIGNATURE_OLDOFFICE  = "$oldoffice$";
 static const char *SIGNATURE_OLDOFFICE0 = "$oldoffice$0";
 static const char *SIGNATURE_OLDOFFICE1 = "$oldoffice$1";
 
+char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  char *jit_build_options = NULL;
+
+  u32 native_threads = 0;
+
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+  {
+    native_threads = 1;
+  }
+  else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+  {
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    {
+      native_threads = 8;
+    }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      native_threads = 64;
+    }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
+    else
+    {
+      native_threads = 32;
+    }
+  }
+
+  hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
+
+  return jit_build_options;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (oldoffice01_t);
@@ -273,7 +308,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = module_jit_build_options;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_09800.c b/src/modules/module_09800.c
index 4508fcd5b..2eb7fab05 100644
--- a/src/modules/module_09800.c
+++ b/src/modules/module_09800.c
@@ -79,6 +79,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_09810.c b/src/modules/module_09810.c
index e1a434cf3..2a1074b2c 100644
--- a/src/modules/module_09810.c
+++ b/src/modules/module_09810.c
@@ -78,6 +78,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_09820.c b/src/modules/module_09820.c
index f3f7ddee2..ea3dfe22b 100644
--- a/src/modules/module_09820.c
+++ b/src/modules/module_09820.c
@@ -80,6 +80,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_10100.c b/src/modules/module_10100.c
index 092db5cf3..c5cafc8c9 100644
--- a/src/modules/module_10100.c
+++ b/src/modules/module_10100.c
@@ -21,6 +21,7 @@ static const char *HASH_NAME      = "SipHash";
 static const u64   KERN_TYPE      = 10100;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_NOT_ITERATED
+                                  | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_RAW_HASH;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
diff --git a/src/modules/module_10400.c b/src/modules/module_10400.c
index c782d9c04..77416f5ce 100644
--- a/src/modules/module_10400.c
+++ b/src/modules/module_10400.c
@@ -84,6 +84,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_10410.c b/src/modules/module_10410.c
index df1d40d2d..b2c98363f 100644
--- a/src/modules/module_10410.c
+++ b/src/modules/module_10410.c
@@ -85,6 +85,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_10420.c b/src/modules/module_10420.c
index 6f182a436..23e537bf5 100644
--- a/src/modules/module_10420.c
+++ b/src/modules/module_10420.c
@@ -64,6 +64,41 @@ typedef struct pdf
 
 static const char *SIGNATURE_PDF = "$pdf$";
 
+char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  char *jit_build_options = NULL;
+
+  u32 native_threads = 0;
+
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+  {
+    native_threads = 1;
+  }
+  else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+  {
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
+    {
+      native_threads = 8;
+    }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      native_threads = 64;
+    }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
+    else
+    {
+      native_threads = 32;
+    }
+  }
+
+  hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
+
+  return jit_build_options;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (pdf_t);
@@ -369,7 +404,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = module_jit_build_options;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_10500.c b/src/modules/module_10500.c
index fbb1af6a3..80a8478ef 100644
--- a/src/modules/module_10500.c
+++ b/src/modules/module_10500.c
@@ -108,6 +108,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       native_threads = 64;
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      native_threads = 64;
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_10700.c b/src/modules/module_10700.c
index 6b6317934..5c801b3c9 100644
--- a/src/modules/module_10700.c
+++ b/src/modules/module_10700.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_DOCUMENTS;
 static const char *HASH_NAME      = "PDF 1.7 Level 8 (Acrobat 10 - 11)";
 static const u64   KERN_TYPE      = 10700;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_HASH_COPY;
diff --git a/src/modules/module_10800.c b/src/modules/module_10800.c
index 65cff2b7e..1765bddac 100644
--- a/src/modules/module_10800.c
+++ b/src/modules/module_10800.c
@@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_10810.c b/src/modules/module_10810.c
index 797c2cf17..10d1443f4 100644
--- a/src/modules/module_10810.c
+++ b/src/modules/module_10810.c
@@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_10820.c b/src/modules/module_10820.c
index d9b76cc5c..82987fe39 100644
--- a/src/modules/module_10820.c
+++ b/src/modules/module_10820.c
@@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_10830.c b/src/modules/module_10830.c
index 91a70b707..f431762f8 100644
--- a/src/modules/module_10830.c
+++ b/src/modules/module_10830.c
@@ -73,6 +73,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_10840.c b/src/modules/module_10840.c
index 4cbb7db28..f60d3ea13 100644
--- a/src/modules/module_10840.c
+++ b/src/modules/module_10840.c
@@ -73,6 +73,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_10870.c b/src/modules/module_10870.c
index 52a70afac..047c67242 100644
--- a/src/modules/module_10870.c
+++ b/src/modules/module_10870.c
@@ -73,6 +73,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_10900.c b/src/modules/module_10900.c
index b0634ec4e..efde01301 100644
--- a/src/modules/module_10900.c
+++ b/src/modules/module_10900.c
@@ -77,6 +77,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_11300.c b/src/modules/module_11300.c
index 9cb3ae217..981a0b471 100644
--- a/src/modules/module_11300.c
+++ b/src/modules/module_11300.c
@@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_11600.c b/src/modules/module_11600.c
index 7694b71f3..25fe732a3 100644
--- a/src/modules/module_11600.c
+++ b/src/modules/module_11600.c
@@ -111,6 +111,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_11700.c b/src/modules/module_11700.c
index eb872087c..ebf903a55 100644
--- a/src/modules/module_11700.c
+++ b/src/modules/module_11700.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_8;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH;
 static const char *HASH_NAME      = "GOST R 34.11-2012 (Streebog) 256-bit, big-endian";
 static const u64   KERN_TYPE      = 11700;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_PT_ADD01;
 static const u32   SALT_TYPE      = SALT_TYPE_NONE;
diff --git a/src/modules/module_11750.c b/src/modules/module_11750.c
index 8ef88b63b..f2302db35 100644
--- a/src/modules/module_11750.c
+++ b/src/modules/module_11750.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_8;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_AUTHENTICATED;
 static const char *HASH_NAME      = "HMAC-Streebog-256 (key = $pass), big-endian";
 static const u64   KERN_TYPE      = 11750;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_PT_ADD01;
 static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
diff --git a/src/modules/module_11760.c b/src/modules/module_11760.c
index d81290f24..5c574e469 100644
--- a/src/modules/module_11760.c
+++ b/src/modules/module_11760.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_8;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_AUTHENTICATED;
 static const char *HASH_NAME      = "HMAC-Streebog-256 (key = $salt), big-endian";
 static const u64   KERN_TYPE      = 11760;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_PT_ADD01;
 static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
diff --git a/src/modules/module_11800.c b/src/modules/module_11800.c
index 89bd377d9..3c83bc0c0 100644
--- a/src/modules/module_11800.c
+++ b/src/modules/module_11800.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_16;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH;
 static const char *HASH_NAME      = "GOST R 34.11-2012 (Streebog) 512-bit, big-endian";
 static const u64   KERN_TYPE      = 11800;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_PT_ADD01;
 static const u32   SALT_TYPE      = SALT_TYPE_NONE;
diff --git a/src/modules/module_11850.c b/src/modules/module_11850.c
index 18a993aa2..6bf1853b4 100644
--- a/src/modules/module_11850.c
+++ b/src/modules/module_11850.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_16;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_AUTHENTICATED;
 static const char *HASH_NAME      = "HMAC-Streebog-512 (key = $pass), big-endian";
 static const u64   KERN_TYPE      = 11850;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_PT_ADD01;
 static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
diff --git a/src/modules/module_11860.c b/src/modules/module_11860.c
index ad29aa7a5..67a5ff618 100644
--- a/src/modules/module_11860.c
+++ b/src/modules/module_11860.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_16;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_AUTHENTICATED;
 static const char *HASH_NAME      = "HMAC-Streebog-512 (key = $salt), big-endian";
 static const u64   KERN_TYPE      = 11860;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_PT_ADD01;
 static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
diff --git a/src/modules/module_12200.c b/src/modules/module_12200.c
index 15b6c0c85..3f6b57821 100644
--- a/src/modules/module_12200.c
+++ b/src/modules/module_12200.c
@@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_12800.c b/src/modules/module_12800.c
index 22658f2c6..cd2099cdc 100644
--- a/src/modules/module_12800.c
+++ b/src/modules/module_12800.c
@@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_12900.c b/src/modules/module_12900.c
index cabff4977..4c5a9892b 100644
--- a/src/modules/module_12900.c
+++ b/src/modules/module_12900.c
@@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_13000.c b/src/modules/module_13000.c
index 2d441994a..ab389431d 100644
--- a/src/modules/module_13000.c
+++ b/src/modules/module_13000.c
@@ -75,6 +75,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_13100.c b/src/modules/module_13100.c
index bab0dbf26..fac5cb24c 100644
--- a/src/modules/module_13100.c
+++ b/src/modules/module_13100.c
@@ -79,6 +79,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
         native_threads = 64;
       }
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      if (device_param->device_local_mem_size < 49152)
+      {
+        native_threads = 32;
+      }
+      else
+      {
+        native_threads = 64;
+      }
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_13600.c b/src/modules/module_13600.c
index a7d675138..ec1c4e6ef 100644
--- a/src/modules/module_13600.c
+++ b/src/modules/module_13600.c
@@ -61,7 +61,7 @@ typedef struct zip2
   u32 verify_bytes;
   u32 compress_length;
   u32 data_len;
-  u32 data_buf[2048];
+  u32 data_buf[0x4000000];
   u32 auth_len;
   u32 auth_buf[4];
 
@@ -166,7 +166,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
                    | TOKEN_ATTR_VERIFY_HEX;
 
   token.len_min[7] = 0;
-  token.len_max[7] = 16384;
+  token.len_max[7] = 0x4000000 * 4 * 2;
   token.sep[7]     = '*';
   token.attr[7]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
diff --git a/src/modules/module_14000.c b/src/modules/module_14000.c
index c6854d6c9..013888bcc 100644
--- a/src/modules/module_14000.c
+++ b/src/modules/module_14000.c
@@ -76,6 +76,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_14500.c b/src/modules/module_14500.c
index 286117f86..ba4edab17 100644
--- a/src/modules/module_14500.c
+++ b/src/modules/module_14500.c
@@ -113,6 +113,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_15000.c b/src/modules/module_15000.c
index 481f88cb3..9fae0e769 100644
--- a/src/modules/module_15000.c
+++ b/src/modules/module_15000.c
@@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_15600.c b/src/modules/module_15600.c
index 168609688..c7acdb8d3 100644
--- a/src/modules/module_15600.c
+++ b/src/modules/module_15600.c
@@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_16200.c b/src/modules/module_16200.c
index 7f1b34959..19cbbac8d 100644
--- a/src/modules/module_16200.c
+++ b/src/modules/module_16200.c
@@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_16300.c b/src/modules/module_16300.c
index 8ce5e668d..33997b1ed 100644
--- a/src/modules/module_16300.c
+++ b/src/modules/module_16300.c
@@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_16900.c b/src/modules/module_16900.c
index 5cfe5aeb7..93915b592 100644
--- a/src/modules/module_16900.c
+++ b/src/modules/module_16900.c
@@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_17200.c b/src/modules/module_17200.c
index fa510ba27..44798110d 100644
--- a/src/modules/module_17200.c
+++ b/src/modules/module_17200.c
@@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
   // workaround would be to rewrite kernel to use global memory
 
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    return true;
+  }
+
   if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     return true;
diff --git a/src/modules/module_17220.c b/src/modules/module_17220.c
index 8f1beaf1a..9028040d9 100644
--- a/src/modules/module_17220.c
+++ b/src/modules/module_17220.c
@@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
   // workaround would be to rewrite kernel to use global memory
 
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    return true;
+  }
+
   if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     return true;
diff --git a/src/modules/module_17225.c b/src/modules/module_17225.c
index 3b3291d5f..75c376c9e 100644
--- a/src/modules/module_17225.c
+++ b/src/modules/module_17225.c
@@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
   // workaround would be to rewrite kernel to use global memory
 
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    return true;
+  }
+
   if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     return true;
diff --git a/src/modules/module_18200.c b/src/modules/module_18200.c
index b95ddab6f..e6596306b 100644
--- a/src/modules/module_18200.c
+++ b/src/modules/module_18200.c
@@ -79,6 +79,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
         native_threads = 64;
       }
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      if (device_param->device_local_mem_size < 49152)
+      {
+        native_threads = 32;
+      }
+      else
+      {
+        native_threads = 64;
+      }
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_18300.c b/src/modules/module_18300.c
index 592081296..b58ef35f5 100644
--- a/src/modules/module_18300.c
+++ b/src/modules/module_18300.c
@@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_18800.c b/src/modules/module_18800.c
index 5bb6132a7..6847edde2 100644
--- a/src/modules/module_18800.c
+++ b/src/modules/module_18800.c
@@ -80,6 +80,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_19200.c b/src/modules/module_19200.c
index 165b66ec9..3dd7b46f6 100644
--- a/src/modules/module_19200.c
+++ b/src/modules/module_19200.c
@@ -21,7 +21,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_8_8;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "QNX /etc/shadow (SHA512)";
 static const u64   KERN_TYPE      = 19200;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
diff --git a/src/modules/module_20600.c b/src/modules/module_20600.c
index a7debbc58..e270fde50 100644
--- a/src/modules/module_20600.c
+++ b/src/modules/module_20600.c
@@ -71,6 +71,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_21000.c b/src/modules/module_21000.c
index e482be5e1..7df98beb4 100644
--- a/src/modules/module_21000.c
+++ b/src/modules/module_21000.c
@@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c
index 9ae25490d..a3f3a00c2 100644
--- a/src/modules/module_21800.c
+++ b/src/modules/module_21800.c
@@ -96,6 +96,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
     }
   }
 
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    return true;
+  }
+
   // amdgpu-pro-20.50-1234664-ubuntu-20.04 (rocr)
   // test_1620713931/test_report.log:! unhandled return code 255, cmdline : cat test_1620713931/21800_passwords.txt | ./hashcat --quiet --potfile-disable --runtime 400 --hwmon-disable -O -D 2 --backend-vector-width 4 -a 0 -m 21800 test_1620713931/21800_hashes.txt
   // test_1620719578/test_report.log:! unhandled return code 255, cmdline : cat test_1620719578/21800_passwords.txt | ./hashcat --quiet --potfile-disable --runtime 400 --hwmon-disable -D 2 --backend-vector-width 4 -a 0 -m 21800 test_1620719578/21800_hashes.txt
diff --git a/src/modules/module_22100.c b/src/modules/module_22100.c
index cd79bc7f8..47b72d7df 100644
--- a/src/modules/module_22100.c
+++ b/src/modules/module_22100.c
@@ -86,6 +86,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_22200.c b/src/modules/module_22200.c
index 1f034ef50..5ea525fbe 100644
--- a/src/modules/module_22200.c
+++ b/src/modules/module_22200.c
@@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_22400.c b/src/modules/module_22400.c
index 567dec821..a3ab81101 100644
--- a/src/modules/module_22400.c
+++ b/src/modules/module_22400.c
@@ -96,6 +96,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_23400.c b/src/modules/module_23400.c
index 4ba636bfa..5921ca9db 100644
--- a/src/modules/module_23400.c
+++ b/src/modules/module_23400.c
@@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_24200.c b/src/modules/module_24200.c
index b1da70c9a..bd93b36e0 100644
--- a/src/modules/module_24200.c
+++ b/src/modules/module_24200.c
@@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_25300.c b/src/modules/module_25300.c
index 6bed1017c..e21d55881 100644
--- a/src/modules/module_25300.c
+++ b/src/modules/module_25300.c
@@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     hc_asprintf (&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_25400.c b/src/modules/module_25400.c
index 5dbbe8dc3..341837786 100644
--- a/src/modules/module_25400.c
+++ b/src/modules/module_25400.c
@@ -118,6 +118,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
         native_threads = 64;
       }
     }
+    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+    {
+      if (device_param->device_local_mem_size < 49152)
+      {
+        native_threads = 32;
+      }
+      else
+      {
+        native_threads = 64;
+      }
+    }
     else
     {
       native_threads = 32;
diff --git a/src/modules/module_25500.c b/src/modules/module_25500.c
index 10edd7203..e6853e951 100644
--- a/src/modules/module_25500.c
+++ b/src/modules/module_25500.c
@@ -74,6 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_25900.c b/src/modules/module_25900.c
index efe7f39f2..c44a1642b 100644
--- a/src/modules/module_25900.c
+++ b/src/modules/module_25900.c
@@ -81,6 +81,12 @@ char* module_jit_build_options(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYB
     hc_asprintf(&jit_build_options, "-D _unroll");
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_26200.c b/src/modules/module_26200.c
index 63989796e..8b082f2d5 100644
--- a/src/modules/module_26200.c
+++ b/src/modules/module_26200.c
@@ -74,6 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/modules/module_26600.c b/src/modules/module_26600.c
index 61ccac983..c9e04958c 100644
--- a/src/modules/module_26600.c
+++ b/src/modules/module_26600.c
@@ -74,6 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     return jit_build_options;
   }
 
+  // HIP
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    hc_asprintf (&jit_build_options, "-D _unroll");
+  }
+
   // ROCM
   if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
diff --git a/src/selftest.c b/src/selftest.c
index 422cecc1a..4f8dc1092 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -32,6 +32,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     device_param->kernel_params[18] = &device_param->cuda_d_st_esalts_buf;
   }
 
+  if (device_param->is_hip == true)
+  {
+    device_param->kernel_params[15] = &device_param->hip_d_st_digests_buf;
+    device_param->kernel_params[17] = &device_param->hip_d_st_salts_buf;
+    device_param->kernel_params[18] = &device_param->hip_d_st_esalts_buf;
+  }
+
   if (device_param->is_opencl == true)
   {
     device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf;
@@ -91,6 +98,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -126,6 +138,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -190,6 +207,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, &comb, 1 * sizeof (pw_t)) == -1) return -1;
+
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL) == -1) return -1;
@@ -225,6 +249,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -276,6 +305,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bfs_c, &bf, 1 * sizeof (bf_t)) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bfs_c, &bf, 1 * sizeof (bf_t)) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL) == -1) return -1;
@@ -372,6 +406,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
           }
 
+          if (device_param->is_hip == true)
+          {
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+          }
+
           if (device_param->is_opencl == true)
           {
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -400,6 +439,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
@@ -446,6 +490,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_cuda_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 1) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 1) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 1) == -1) return -1;
@@ -463,6 +512,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -475,6 +529,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -526,6 +585,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -538,6 +602,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
         }
 
+        if (device_param->is_hip == true)
+        {
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
+        }
+
         if (device_param->is_opencl == true)
         {
           if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
@@ -617,6 +686,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32)) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32)) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL) == -1) return -1;
@@ -646,6 +720,20 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_result,          device_param->size_results) == -1) return -1;
   }
 
+  if (device_param->is_hip == true)
+  {
+    device_param->kernel_params[15] = &device_param->hip_d_digests_buf;
+    device_param->kernel_params[17] = &device_param->hip_d_salt_bufs;
+    device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs;
+
+    if (run_hip_kernel_bzero   (hashcat_ctx, device_param, device_param->hip_d_pws_buf,         device_param->size_pws)     == -1) return -1;
+    if (run_hip_kernel_bzero   (hashcat_ctx, device_param, device_param->hip_d_tmps,            device_param->size_tmps)    == -1) return -1;
+    if (run_hip_kernel_bzero   (hashcat_ctx, device_param, device_param->hip_d_hooks,           device_param->size_hooks)   == -1) return -1;
+    if (run_hip_kernel_bzero   (hashcat_ctx, device_param, device_param->hip_d_plain_bufs,      device_param->size_plains)  == -1) return -1;
+    if (run_hip_kernel_bzero   (hashcat_ctx, device_param, device_param->hip_d_digests_shown,   device_param->size_shown)   == -1) return -1;
+    if (run_hip_kernel_bzero   (hashcat_ctx, device_param, device_param->hip_d_result,          device_param->size_results) == -1) return -1;
+  }
+
   if (device_param->is_opencl == true)
   {
     device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
@@ -667,6 +755,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c) == -1) return -1;
     }
 
+    if (device_param->is_hip == true)
+    {
+      if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1;
@@ -681,6 +774,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1;
@@ -693,6 +791,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c, device_param->size_combs) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, device_param->size_combs) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs) == -1) return -1;
@@ -705,6 +808,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c, device_param->size_bfs) == -1) return -1;
       }
 
+      if (device_param->is_hip == true)
+      {
+        if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, device_param->size_bfs) == -1) return -1;
+      }
+
       if (device_param->is_opencl == true)
       {
         if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs) == -1) return -1;
@@ -718,15 +826,21 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   {
     hc_thread_mutex_lock (status_ctx->mux_display);
 
-    if (device_param->is_opencl == true)
-    {
-      event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1);
-    }
     if (device_param->is_cuda == true)
     {
       event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! CUDA kernel self-test failed.", device_param->device_id + 1);
     }
 
+    if (device_param->is_hip == true)
+    {
+      event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! HIP kernel self-test failed.", device_param->device_id + 1);
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1);
+    }
+
     event_log_warning (hashcat_ctx, "Your device driver installation is probably broken.");
     event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
     event_log_warning (hashcat_ctx, NULL);
@@ -761,7 +875,12 @@ HC_API_CALL void *thread_selftest (void *p)
 
   if (device_param->is_cuda == true)
   {
-    if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+    if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
   }
 
   const int rc_selftest = selftest (hashcat_ctx, device_param);
@@ -782,5 +901,15 @@ HC_API_CALL void *thread_selftest (void *p)
     }
   }
 
+  if (device_param->is_cuda == true)
+  {
+    if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
+  }
+
+  if (device_param->is_hip == true)
+  {
+    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
+  }
+
   return NULL;
 }
diff --git a/src/terminal.c b/src/terminal.c
index ae186a8b8..54bffed10 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -810,6 +810,54 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  if (backend_ctx->hip)
+  {
+    event_log_info (hashcat_ctx, "HIP Info:");
+    event_log_info (hashcat_ctx, "=========");
+    event_log_info (hashcat_ctx, NULL);
+
+    int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
+    int hip_driver_version = backend_ctx->hip_driver_version;
+
+    event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driver_version / 1000, (hip_driver_version % 100) / 10);
+    event_log_info (hashcat_ctx, NULL);
+
+    for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++)
+    {
+      const int backend_devices_idx = backend_ctx->backend_device_from_hip[hip_devices_idx];
+
+      const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+      int   device_id                 = device_param->device_id;
+      char *device_name               = device_param->device_name;
+      u32   device_processors         = device_param->device_processors;
+      u32   device_maxclock_frequency = device_param->device_maxclock_frequency;
+      u64   device_available_mem      = device_param->device_available_mem;
+      u64   device_global_mem         = device_param->device_global_mem;
+      u8    pcie_domain               = device_param->pcie_domain;
+      u8    pcie_bus                  = device_param->pcie_bus;
+      u8    pcie_device               = device_param->pcie_device;
+      u8    pcie_function             = device_param->pcie_function;
+
+      if (device_param->device_id_alias_cnt)
+      {
+        event_log_info (hashcat_ctx, "Backend Device ID #%d (Alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1);
+      }
+
+      event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
+      event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
+      event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
+      event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
+      event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
+      event_log_info (hashcat_ctx, "  PCI.Addr.BDFe..: %04x:%02x:%02x.%d", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
+      event_log_info (hashcat_ctx, NULL);
+    }
+  }
+
   if (backend_ctx->ocl)
   {
     event_log_info (hashcat_ctx, "OpenCL Info:");
@@ -908,6 +956,10 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx)
   if (user_options->machine_readable == true) return;
   if (user_options->status_json      == true) return;
 
+  /**
+   * CUDA
+   */
+
   if (backend_ctx->cuda)
   {
     int cuda_devices_cnt    = backend_ctx->cuda_devices_cnt;
@@ -955,6 +1007,61 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx)
     event_log_info (hashcat_ctx, NULL);
   }
 
+  /**
+   * HIP
+   */
+
+  if (backend_ctx->hip)
+  {
+    int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
+    int hip_driver_version = backend_ctx->hip_driver_version;
+
+    const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driver_version / 1000, (hip_driver_version % 100) / 10);
+
+    char line[HCBUFSIZ_TINY] = { 0 };
+
+    memset (line, '=', len);
+
+    line[len] = 0;
+
+    event_log_info (hashcat_ctx, "%s", line);
+
+    for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++)
+    {
+      const int backend_devices_idx = backend_ctx->backend_device_from_hip[hip_devices_idx];
+
+      const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+      int   device_id            = device_param->device_id;
+      char *device_name          = device_param->device_name;
+      u32   device_processors    = device_param->device_processors;
+      u64   device_global_mem    = device_param->device_global_mem;
+      u64   device_available_mem = device_param->device_available_mem;
+
+      if ((device_param->skipped == false) && (device_param->skipped_warning == false))
+      {
+        event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB, %uMCU",
+                  device_id + 1,
+                  device_name,
+                  device_available_mem / 1024 / 1024,
+                  device_global_mem    / 1024 / 1024,
+                  device_processors);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "* Device #%u: %s, skipped",
+                  device_id + 1,
+                  device_name);
+      }
+    }
+
+    event_log_info (hashcat_ctx, NULL);
+  }
+
+  /**
+   * OpenCL
+   */
+
   if (backend_ctx->ocl)
   {
     cl_uint   opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
diff --git a/src/user_options.c b/src/user_options.c
index eca0244c5..b7e810b14 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -31,6 +31,7 @@ static const struct option long_options[] =
   {"attack-mode",               required_argument, NULL, IDX_ATTACK_MODE},
   {"backend-devices",           required_argument, NULL, IDX_BACKEND_DEVICES},
   {"backend-ignore-cuda",       no_argument,       NULL, IDX_BACKEND_IGNORE_CUDA},
+  {"backend-ignore-hip",        no_argument,       NULL, IDX_BACKEND_IGNORE_HIP},
   {"backend-ignore-opencl",     no_argument,       NULL, IDX_BACKEND_IGNORE_OPENCL},
   {"backend-info",              no_argument,       NULL, IDX_BACKEND_INFO},
   {"backend-vector-width",      required_argument, NULL, IDX_BACKEND_VECTOR_WIDTH},
@@ -162,6 +163,7 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->autodetect                = AUTODETECT;
   user_options->backend_devices           = NULL;
   user_options->backend_ignore_cuda       = BACKEND_IGNORE_CUDA;
+  user_options->backend_ignore_hip       = BACKEND_IGNORE_HIP;
   user_options->backend_ignore_opencl     = BACKEND_IGNORE_OPENCL;
   user_options->backend_info              = BACKEND_INFO;
   user_options->backend_vector_width      = BACKEND_VECTOR_WIDTH;
@@ -441,6 +443,7 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_HEX_WORDLIST:              user_options->hex_wordlist              = true;                            break;
       case IDX_CPU_AFFINITY:              user_options->cpu_affinity              = optarg;                          break;
       case IDX_BACKEND_IGNORE_CUDA:       user_options->backend_ignore_cuda       = true;                            break;
+      case IDX_BACKEND_IGNORE_HIP:        user_options->backend_ignore_hip        = true;                            break;
       case IDX_BACKEND_IGNORE_OPENCL:     user_options->backend_ignore_opencl     = true;                            break;
       case IDX_BACKEND_INFO:              user_options->backend_info              = true;                            break;
       case IDX_BACKEND_DEVICES:           user_options->backend_devices           = optarg;                          break;
diff --git a/tools/benchmark_deep.pl b/tools/benchmark_deep.pl
index fc7efad2c..df6777441 100755
--- a/tools/benchmark_deep.pl
+++ b/tools/benchmark_deep.pl
@@ -13,7 +13,7 @@ my $amd_cache        = "~/.AMD";
 my $hashcat_path     = ".";
 my $kernels_cache    = "$hashcat_path/kernels";
 my $hashcat_bin      = "$hashcat_path/hashcat";
-my $device           = 3;
+my $device           = 1;
 my $workload_profile = 3;
 my $runtime          = 24;
 my $sleep_sec        = 12;
diff --git a/tools/package_bin.sh b/tools/package_bin.sh
index e2f3a7659..38e2c82f4 100755
--- a/tools/package_bin.sh
+++ b/tools/package_bin.sh
@@ -6,7 +6,7 @@
 ##
 
 export IN=$HOME/hashcat
-export OUT=$HOME/xy/hashcat-6.2.2
+export OUT=$HOME/xy/hashcat-6.2.3
 
 rm -rf $OUT
 rm -rf $OUT.7z