1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-22 22:58:30 +00:00

More fixes for IS_UNKNOWN in kernels

This commit is contained in:
jsteube 2016-01-05 22:47:56 +01:00
parent 3979fe6991
commit 8e3a72f911
2 changed files with 111 additions and 107 deletions

View File

@ -130,7 +130,7 @@ static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4]
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#ifdef IS_AMD #if defined IS_AMD || defined IS_UNKNOWN
const int offset_minus_4 = 4 - (block_len & 3); const int offset_minus_4 = 4 - (block_len & 3);
@ -242,7 +242,7 @@ static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#ifdef IS_AMD #if defined IS_AMD || defined IS_UNKNOWN
const int offset_minus_4 = 4 - (block_len & 3); const int offset_minus_4 = 4 - (block_len & 3);
@ -352,7 +352,7 @@ static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4],
u32 tmp1; u32 tmp1;
u32 tmp2; u32 tmp2;
#ifdef IS_AMD #if defined IS_AMD || defined IS_UNKNOWN
const int offset_minus_4 = 4 - (block_len & 3); const int offset_minus_4 = 4 - (block_len & 3);

View File

@ -8,6 +8,36 @@ typedef ushort u16;
typedef uint u32; typedef uint u32;
typedef ulong u64; typedef ulong u64;
#define allx(r) r
/*
static u32 allx (const u32 r)
{
return r;
}
*/
static inline u32 l32_from_64 (u64 a)
{
const u32 r = (uint) (a);
return r;
}
static inline u32 h32_from_64 (u64 a)
{
a >>= 32;
const u32 r = (uint) (a);
return r;
}
static inline u64 hl32_to_64 (const u32 a, const u32 b)
{
return as_ulong ((uint2) (b, a));
}
#ifdef IS_AMD #ifdef IS_AMD
static inline u32 swap32 (const u32 v) static inline u32 swap32 (const u32 v)
{ {
@ -88,9 +118,7 @@ static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
return r; return r;
} }
#if CUDA_ARCH >= 350 #if CUDA_ARCH >= 350
static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c) static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
{ {
u32 r; u32 r;
@ -99,113 +127,31 @@ static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
return r; return r;
} }
#else #else
static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c) static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
{ {
return __byte_perm (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff); return __byte_perm (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
} }
#endif
#endif #endif
static inline u32 lut3_2d (const u32 a, const u32 b, const u32 c) #ifdef IS_UNKNOWN
static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
{ {
u32 r; #define BIT(x) (1 << (x))
#define BIT_MASK(x) (BIT (x) - 1)
#define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); return BFE (a, b, c);
return r;
} }
static inline u32 lut3_39 (const u32 a, const u32 b, const u32 c) static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
{ {
u32 r; return (u32) (((((u64) a) << 32) | (u64) b) >> ((c & 3) * 8));
asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
} }
static inline u32 lut3_59 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_96 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_e4 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_e8 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_ca (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
#endif #endif
#define allx(r) r
/*
static u32 allx (const u32 r)
{
return r;
}
*/
static inline u32 l32_from_64 (u64 a)
{
const u32 r = (uint) (a);
return r;
}
static inline u32 h32_from_64 (u64 a)
{
a >>= 32;
const u32 r = (uint) (a);
return r;
}
static inline u64 hl32_to_64 (const u32 a, const u32 b)
{
return as_ulong ((uint2) (b, a));
}
#ifdef IS_AMD #ifdef IS_AMD
static inline u32 rotr32 (const u32 a, const u32 n) static inline u32 rotr32 (const u32 a, const u32 n)
{ {
return rotate (a, 32 - n); return rotate (a, 32 - n);
@ -234,13 +180,10 @@ static inline u64 rotl64 (const u64 a, const u32 n)
{ {
return rotr64 (a, 64 - n); return rotr64 (a, 64 - n);
} }
#endif #endif
#ifdef IS_NV #ifdef IS_NV
#if CUDA_ARCH >= 350 #if CUDA_ARCH >= 350
/* /*
this version reduced the number of registers but for some unknown reason the whole kernel become slower.. instruction cache monster? this version reduced the number of registers but for some unknown reason the whole kernel become slower.. instruction cache monster?
static inline u32 rotr32 (const u32 a, const u32 n) static inline u32 rotr32 (const u32 a, const u32 n)
@ -333,9 +276,7 @@ static inline u64 rotl64 (const u64 a, const u32 n)
{ {
return rotr64 (a, 64 - n); return rotr64 (a, 64 - n);
} }
#else #else
static inline u32 rotr32 (const u32 a, const u32 n) static inline u32 rotr32 (const u32 a, const u32 n)
{ {
return rotate (a, 32 - n); return rotate (a, 32 - n);
@ -353,14 +294,12 @@ static inline u64 rotr64 (const u64 a, const u32 n)
static inline u64 rotl64 (const u64 a, const u32 n) static inline u64 rotl64 (const u64 a, const u32 n)
{ {
return rotr64 (a, (u64) 64 - n); return rotate (a, (u64) n);
} }
#endif #endif
#endif #endif
#ifdef IS_UNKNOWN #ifdef IS_UNKNOWN
static inline u32 rotr32 (const u32 a, const u32 n) static inline u32 rotr32 (const u32 a, const u32 n)
{ {
return rotate (a, 32 - n); return rotate (a, 32 - n);
@ -368,8 +307,7 @@ static inline u32 rotr32 (const u32 a, const u32 n)
static inline u32 rotl32 (const u32 a, const u32 n) static inline u32 rotl32 (const u32 a, const u32 n)
{ {
// return rotate (a, n); return rotate (a, n);
return (a << n) | (a >> (32 - n));
} }
static inline u64 rotr64 (const u64 a, const u32 n) static inline u64 rotr64 (const u64 a, const u32 n)
@ -379,9 +317,75 @@ static inline u64 rotr64 (const u64 a, const u32 n)
static inline u64 rotl64 (const u64 a, const u32 n) static inline u64 rotl64 (const u64 a, const u32 n)
{ {
return rotr64 (a, (u64) 64 - n); return rotate (a, (u64) n);
}
#endif
#ifdef IS_NV
#if CUDA_ARCH >= 500
static inline u32 lut3_2d (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
} }
static inline u32 lut3_39 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_59 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_96 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_e4 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_e8 (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
static inline u32 lut3_ca (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
return r;
}
#endif
#endif #endif
typedef struct typedef struct