1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-22 16:18:09 +00:00

Limit max threads per block to 64 to enable offline compiler to make better use if available registers

Fix double free() for hip_event1/hip_event2 and hip_stream causes segfault
Replace hc_cuCtxSetCurrent() with hc_cuCtxPushCurrent() in order to align changes with HIP
Add vector datatype operators (if we decide to use them - currently unused)
This commit is contained in:
Jens Steube 2021-07-12 09:28:26 +02:00
parent 1ab56afb9e
commit 23c3c178bf
10 changed files with 853 additions and 8 deletions

View File

@ -824,6 +824,766 @@ typedef __device_builtin__ struct u64x u64x;
#define make_u32x u32x
#define make_u64x u64x
#elif defined IS_HIP
// seems to work, but slow
/*
#if VECT_SIZE == 2
struct u8x
{
u8 s0;
u8 s1;
inline __device__ u8x (const u8 a, const u8 b) : s0(a), s1(b) { }
inline __device__ u8x (const u8 a) : s0(a), s1(a) { }
inline __device__ u8x (void) : s0(0), s1(0) { }
inline __device__ ~u8x (void) { }
};
struct u16x
{
u16 s0;
u16 s1;
inline __device__ u16x (const u16 a, const u16 b) : s0(a), s1(b) { }
inline __device__ u16x (const u16 a) : s0(a), s1(a) { }
inline __device__ u16x (void) : s0(0), s1(0) { }
inline __device__ ~u16x (void) { }
};
struct u32x
{
u32 s0;
u32 s1;
inline __device__ u32x (const u32 a, const u32 b) : s0(a), s1(b) { }
inline __device__ u32x (const u32 a) : s0(a), s1(a) { }
inline __device__ u32x (void) : s0(0), s1(0) { }
inline __device__ ~u32x (void) { }
};
struct u64x
{
u64 s0;
u64 s1;
inline __device__ u64x (const u64 a, const u64 b) : s0(a), s1(b) { }
inline __device__ u64x (const u64 a) : s0(a), s1(a) { }
inline __device__ u64x (void) : s0(0), s1(0) { }
inline __device__ ~u64x (void) { }
};
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b)); }
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; }
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; }
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; }
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; }
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; }
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; }
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; }
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; }
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; }
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; }
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; }
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; }
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; }
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; }
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; }
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; }
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) ); }
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1)); }
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) ); }
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1)); }
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) ); }
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1)); }
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) ); }
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1)); }
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) ); }
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1)); }
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) ); }
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1)); }
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) ); }
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1)); }
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) ); }
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1)); }
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) ); }
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1)); }
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1); }
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b)); }
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; }
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; }
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; }
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; }
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; }
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; }
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; }
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; }
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; }
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; }
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; }
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; }
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; }
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; }
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; }
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; }
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) ); }
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1)); }
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) ); }
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1)); }
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) ); }
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1)); }
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) ); }
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1)); }
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) ); }
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1)); }
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) ); }
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1)); }
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) ); }
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1)); }
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) ); }
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1)); }
inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) ); }
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1)); }
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1); }
#endif
#if VECT_SIZE == 4
struct u8x
{
u8 s0;
u8 s1;
u8 s2;
u8 s3;
inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { }
inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a) { }
inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0) { }
inline __device__ ~u8x (void) { }
};
struct u16x
{
u16 s0;
u16 s1;
u16 s2;
u16 s3;
inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { }
inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a) { }
inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0) { }
inline __device__ ~u16x (void) { }
};
struct u32x
{
u32 s0;
u32 s1;
u32 s2;
u32 s3;
inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { }
inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a) { }
inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0) { }
inline __device__ ~u32x (void) { }
};
struct u64x
{
u64 s0;
u64 s1;
u64 s2;
u64 s3;
inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { }
inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a) { }
inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0) { }
inline __device__ ~u64x (void) { }
};
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); }
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; }
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; }
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; }
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; }
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; }
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; }
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; }
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; }
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; }
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; }
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; }
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; }
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; }
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; }
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; }
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; }
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) ); }
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3)); }
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) ); }
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3)); }
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) ); }
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3)); }
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) ); }
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3)); }
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) ); }
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3)); }
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) ); }
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3)); }
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) ); }
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3)); }
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); }
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); }
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); }
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); }
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); }
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; }
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; }
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; }
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; }
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; }
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; }
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; }
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; }
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; }
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; }
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; }
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; }
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; }
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; }
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; }
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; }
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) ); }
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3)); }
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) ); }
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3)); }
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) ); }
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3)); }
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) ); }
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3)); }
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) ); }
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3)); }
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) ); }
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3)); }
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) ); }
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3)); }
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); }
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); }
inline __device__ u64x operator % (const u64x a, const u32 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); }
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); }
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
#endif
#if VECT_SIZE == 8
struct u8x
{
u8 s0;
u8 s1;
u8 s2;
u8 s3;
u8 s4;
u8 s5;
u8 s6;
u8 s7;
inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
inline __device__ ~u8x (void) { }
};
struct u16x
{
u16 s0;
u16 s1;
u16 s2;
u16 s3;
u16 s4;
u16 s5;
u16 s6;
u16 s7;
inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
inline __device__ ~u16x (void) { }
};
struct u32x
{
u32 s0;
u32 s1;
u32 s2;
u32 s3;
u32 s4;
u32 s5;
u32 s6;
u32 s7;
inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
inline __device__ ~u32x (void) { }
};
struct u64x
{
u64 s0;
u64 s1;
u64 s2;
u64 s3;
u64 s4;
u64 s5;
u64 s6;
u64 s7;
inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
inline __device__ ~u64x (void) { }
};
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); }
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; }
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; }
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; }
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; }
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; }
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; }
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; }
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; }
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; }
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; }
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; }
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; }
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; }
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; }
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; }
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; }
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b) ); }
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7)); }
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b) ); }
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7)); }
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b) ); }
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7)); }
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b) ); }
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7)); }
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b) ); }
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7)); }
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b) ); }
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7)); }
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b) ); }
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7)); }
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); }
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); }
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); }
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); }
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); }
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; }
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; }
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; }
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; }
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; }
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; }
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; }
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; }
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; }
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; }
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; }
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; }
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; }
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; }
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; }
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; }
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b) ); }
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7)); }
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b) ); }
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7)); }
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b) ); }
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7)); }
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b) ); }
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7)); }
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b) ); }
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7)); }
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b) ); }
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7)); }
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b) ); }
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7)); }
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); }
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); }
inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); }
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); }
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
#endif
#if VECT_SIZE == 16
struct u8x
{
u8 s0;
u8 s1;
u8 s2;
u8 s3;
u8 s4;
u8 s5;
u8 s6;
u8 s7;
u8 s8;
u8 s9;
u8 sa;
u8 sb;
u8 sc;
u8 sd;
u8 se;
u8 sf;
inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
inline __device__ ~u8x (void) { }
};
struct u16x
{
u16 s0;
u16 s1;
u16 s2;
u16 s3;
u16 s4;
u16 s5;
u16 s6;
u16 s7;
u16 s8;
u16 s9;
u16 sa;
u16 sb;
u16 sc;
u16 sd;
u16 se;
u16 sf;
inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
inline __device__ ~u16x (void) { }
};
struct u32x
{
u32 s0;
u32 s1;
u32 s2;
u32 s3;
u32 s4;
u32 s5;
u32 s6;
u32 s7;
u32 s8;
u32 s9;
u32 sa;
u32 sb;
u32 sc;
u32 sd;
u32 se;
u32 sf;
inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
inline __device__ ~u32x (void) { }
};
struct u64x
{
u64 s0;
u64 s1;
u64 s2;
u64 s3;
u64 s4;
u64 s5;
u64 s6;
u64 s7;
u64 s8;
u64 s9;
u64 sa;
u64 sb;
u64 sc;
u64 sd;
u64 se;
u64 sf;
inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
inline __device__ ~u64x (void) { }
};
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); }
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; a.s8 ^= b; a.s9 ^= b; a.sa ^= b; a.sb ^= b; a.sc ^= b; a.sd ^= b; a.se ^= b; a.sf ^= b; }
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; a.s8 |= b; a.s9 |= b; a.sa |= b; a.sb |= b; a.sc |= b; a.sd |= b; a.se |= b; a.sf |= b; }
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; a.s8 &= b; a.s9 &= b; a.sa &= b; a.sb &= b; a.sc &= b; a.sd &= b; a.se &= b; a.sf &= b; }
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; a.s8 += b; a.s9 += b; a.sa += b; a.sb += b; a.sc += b; a.sd += b; a.se += b; a.sf += b; }
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; a.s8 -= b; a.s9 -= b; a.sa -= b; a.sb -= b; a.sc -= b; a.sd -= b; a.se -= b; a.sf -= b; }
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; a.s8 *= b; a.s9 *= b; a.sa *= b; a.sb *= b; a.sc *= b; a.sd *= b; a.se *= b; a.sf *= b; }
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; a.s8 >>= b; a.s9 >>= b; a.sa >>= b; a.sb >>= b; a.sc >>= b; a.sd >>= b; a.se >>= b; a.sf >>= b; }
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; a.s8 <<= b; a.s9 <<= b; a.sa <<= b; a.sb <<= b; a.sc <<= b; a.sd <<= b; a.se <<= b; a.sf <<= b; }
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b), (a.s8 << b), (a.s9 << b) , (a.sa << b), (a.sb << b) , (a.sc << b), (a.sd << b) , (a.se << b), (a.sf << b) ); }
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf)); }
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b), (a.s8 >> b), (a.s9 >> b) , (a.sa >> b), (a.sb >> b) , (a.sc >> b), (a.sd >> b) , (a.se >> b), (a.sf >> b) ); }
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf)); }
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b), (a.s8 ^ b), (a.s9 ^ b) , (a.sa ^ b), (a.sb ^ b) , (a.sc ^ b), (a.sd ^ b) , (a.se ^ b), (a.sf ^ b) ); }
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7), (a.s8 ^ b.s8), (a.s9 ^ b.s9), (a.sa ^ b.sa), (a.sb ^ b.sb), (a.sc ^ b.sc), (a.sd ^ b.sd), (a.se ^ b.se), (a.sf ^ b.sf)); }
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b), (a.s8 | b), (a.s9 | b) , (a.sa | b), (a.sb | b) , (a.sc | b), (a.sd | b) , (a.se | b), (a.sf | b) ); }
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7), (a.s8 | b.s8), (a.s9 | b.s9), (a.sa | b.sa), (a.sb | b.sb), (a.sc | b.sc), (a.sd | b.sd), (a.se | b.se), (a.sf | b.sf)); }
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b), (a.s8 & b), (a.s9 & b) , (a.sa & b), (a.sb & b) , (a.sc & b), (a.sd & b) , (a.se & b), (a.sf & b) ); }
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7), (a.s8 & b.s8), (a.s9 & b.s9), (a.sa & b.sa), (a.sb & b.sb), (a.sc & b.sc), (a.sd & b.sd), (a.se & b.se), (a.sf & b.sf)); }
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b), (a.s8 + b), (a.s9 + b) , (a.sa + b), (a.sb + b) , (a.sc + b), (a.sd + b) , (a.se + b), (a.sf + b) ); }
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7), (a.s8 + b.s8), (a.s9 + b.s9), (a.sa + b.sa), (a.sb + b.sb), (a.sc + b.sc), (a.sd + b.sd), (a.se + b.se), (a.sf + b.sf)); }
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b), (a.s8 - b), (a.s9 - b) , (a.sa - b), (a.sb - b) , (a.sc - b), (a.sd - b) , (a.se - b), (a.sf - b) ); }
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7), (a.s8 - b.s8), (a.s9 - b.s9), (a.sa - b.sa), (a.sb - b.sb), (a.sc - b.sc), (a.sd - b.sd), (a.se - b.se), (a.sf - b.sf)); }
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); }
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); }
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); }
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); }
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); }
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; a.s8 ^= b; a.s9 ^= b; a.sa ^= b; a.sb ^= b; a.sc ^= b; a.sd ^= b; a.se ^= b; a.sf ^= b; }
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; a.s8 |= b; a.s9 |= b; a.sa |= b; a.sb |= b; a.sc |= b; a.sd |= b; a.se |= b; a.sf |= b; }
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; a.s8 &= b; a.s9 &= b; a.sa &= b; a.sb &= b; a.sc &= b; a.sd &= b; a.se &= b; a.sf &= b; }
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; a.s8 += b; a.s9 += b; a.sa += b; a.sb += b; a.sc += b; a.sd += b; a.se += b; a.sf += b; }
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; a.s8 -= b; a.s9 -= b; a.sa -= b; a.sb -= b; a.sc -= b; a.sd -= b; a.se -= b; a.sf -= b; }
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; a.s8 *= b; a.s9 *= b; a.sa *= b; a.sb *= b; a.sc *= b; a.sd *= b; a.se *= b; a.sf *= b; }
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; a.s8 >>= b; a.s9 >>= b; a.sa >>= b; a.sb >>= b; a.sc >>= b; a.sd >>= b; a.se >>= b; a.sf >>= b; }
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; a.s8 <<= b; a.s9 <<= b; a.sa <<= b; a.sb <<= b; a.sc <<= b; a.sd <<= b; a.se <<= b; a.sf <<= b; }
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b), (a.s8 << b), (a.s9 << b) , (a.sa << b), (a.sb << b) , (a.sc << b), (a.sd << b) , (a.se << b), (a.sf << b) ); }
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf)); }
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b), (a.s8 >> b), (a.s9 >> b) , (a.sa >> b), (a.sb >> b) , (a.sc >> b), (a.sd >> b) , (a.se >> b), (a.sf >> b) ); }
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf)); }
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b), (a.s8 ^ b), (a.s9 ^ b) , (a.sa ^ b), (a.sb ^ b) , (a.sc ^ b), (a.sd ^ b) , (a.se ^ b), (a.sf ^ b) ); }
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7), (a.s8 ^ b.s8), (a.s9 ^ b.s9), (a.sa ^ b.sa), (a.sb ^ b.sb), (a.sc ^ b.sc), (a.sd ^ b.sd), (a.se ^ b.se), (a.sf ^ b.sf)); }
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b), (a.s8 | b), (a.s9 | b) , (a.sa | b), (a.sb | b) , (a.sc | b), (a.sd | b) , (a.se | b), (a.sf | b) ); }
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7), (a.s8 | b.s8), (a.s9 | b.s9), (a.sa | b.sa), (a.sb | b.sb), (a.sc | b.sc), (a.sd | b.sd), (a.se | b.se), (a.sf | b.sf)); }
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b), (a.s8 & b), (a.s9 & b) , (a.sa & b), (a.sb & b) , (a.sc & b), (a.sd & b) , (a.se & b), (a.sf & b) ); }
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7), (a.s8 & b.s8), (a.s9 & b.s9), (a.sa & b.sa), (a.sb & b.sb), (a.sc & b.sc), (a.sd & b.sd), (a.se & b.se), (a.sf & b.sf)); }
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b), (a.s8 + b), (a.s9 + b) , (a.sa + b), (a.sb + b) , (a.sc + b), (a.sd + b) , (a.se + b), (a.sf + b) ); }
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7), (a.s8 + b.s8), (a.s9 + b.s9), (a.sa + b.sa), (a.sb + b.sb), (a.sc + b.sc), (a.sd + b.sd), (a.se + b.se), (a.sf + b.sf)); }
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b), (a.s8 - b), (a.s9 - b) , (a.sa - b), (a.sb - b) , (a.sc - b), (a.sd - b) , (a.se - b), (a.sf - b) ); }
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7), (a.s8 - b.s8), (a.s9 - b.s9), (a.sa - b.sa), (a.sb - b.sb), (a.sc - b.sc), (a.sd - b.sd), (a.se - b.se), (a.sf - b.sf)); }
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); }
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); }
inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); }
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); }
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
#endif
typedef struct u8x u8x;
typedef struct u16x u16x;
typedef struct u32x u32x;
typedef struct u64x u64x;
#define make_u8x u8x
#define make_u16x u16x
#define make_u32x u32x
#define make_u64x u64x
*/
#else
typedef VTYPE(uchar, VECT_SIZE) u8x;
typedef VTYPE(ushort, VECT_SIZE) u16x;

View File

@ -487,9 +487,7 @@ HC_API_CALL void *thread_autotune (void *p)
if (device_param->is_cuda == true)
{
const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
if (rc_cuCtxSetCurrent == -1) return NULL;
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
@ -504,6 +502,11 @@ HC_API_CALL void *thread_autotune (void *p)
// we should do something here, tell hashcat main that autotune failed to abort
}
if (device_param->is_cuda == true)
{
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;

View File

@ -7979,7 +7979,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
continue;
}
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1)
if (hc_cuCtxPushCurrent (hashcat_ctx, cuda_context) == -1)
{
device_param->skipped = true;
continue;
@ -7996,6 +7996,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->device_available_mem = (u64) free;
if (hc_cuCtxPopCurrent (hashcat_ctx, &cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1)
{
device_param->skipped = true;
@ -10178,6 +10184,12 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
gpu_prefered_thread_count = 32;
}
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
u32 gpu_prefered_thread_count = 64;
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
}
}
@ -10484,8 +10496,9 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
//hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
// TODO HIP
// no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context
hiprtc_options[0] = "";
hiprtc_options[0] = "--gpu-max-threads-per-block=64";
hiprtc_options[1] = "";
hiprtc_options[2] = "";
hiprtc_options[3] = "";
@ -11242,6 +11255,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
native_threads = 64;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
native_threads = 64;
}
else
{
native_threads = 32;
@ -11274,6 +11291,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->skipped = true;
continue;
}
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
}
if (device_param->is_hip == true)
@ -15088,6 +15111,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
// context
if (device_param->is_cuda == true)
{
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1)
@ -15251,6 +15283,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->cuda_function_aux3 = NULL;
device_param->cuda_function_aux4 = NULL;
device_param->cuda_event1 = NULL;
device_param->cuda_event2 = NULL;
device_param->cuda_stream = NULL;
device_param->cuda_module = NULL;
device_param->cuda_module_mp = NULL;
device_param->cuda_module_amp = NULL;
@ -15372,6 +15409,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->hip_function_aux3 = NULL;
device_param->hip_function_aux4 = NULL;
device_param->hip_event1 = NULL;
device_param->hip_event2 = NULL;
device_param->hip_stream = NULL;
device_param->hip_module = NULL;
device_param->hip_module_mp = NULL;
device_param->hip_module_amp = NULL;

View File

@ -347,7 +347,7 @@ HC_API_CALL void *thread_calc_stdin (void *p)
if (device_param->is_cuda == true)
{
if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
@ -362,6 +362,11 @@ HC_API_CALL void *thread_calc_stdin (void *p)
status_ctx->devices_status = STATUS_ERROR;
}
if (device_param->is_cuda == true)
{
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
@ -1591,7 +1596,7 @@ HC_API_CALL void *thread_calc (void *p)
if (device_param->is_cuda == true)
{
if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
@ -1606,6 +1611,11 @@ HC_API_CALL void *thread_calc (void *p)
status_ctx->devices_status = STATUS_ERROR;
}
if (device_param->is_cuda == true)
{
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;

View File

@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
// it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
// workaround would be to rewrite kernel to use global memory
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
return true;
}
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
return true;

View File

@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
// it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
// workaround would be to rewrite kernel to use global memory
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
return true;
}
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
return true;

View File

@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
// it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0
// workaround would be to rewrite kernel to use global memory
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
return true;
}
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
return true;

View File

@ -68,6 +68,11 @@ static const char *SIGNATURE_KRB5TGS = "$krb5tgs$17$";
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
return true;
}
// AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
{

View File

@ -68,6 +68,11 @@ static const char *SIGNATURE_KRB5TGS = "$krb5tgs$18$";
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
return true;
}
// AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
{

View File

@ -875,7 +875,7 @@ HC_API_CALL void *thread_selftest (void *p)
if (device_param->is_cuda == true)
{
if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
@ -901,6 +901,11 @@ HC_API_CALL void *thread_selftest (void *p)
}
}
if (device_param->is_cuda == true)
{
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;