mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-10 15:51:10 +00:00
Inline hc_byte_perm in hc_bytealign
This commit is contained in:
parent
98f130e715
commit
9574862a19
@ -1068,76 +1068,22 @@ DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
|
||||
DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
const int c_mod_4 = c & 3;
|
||||
|
||||
const int c_minus_4 = 4 - c_mod_4;
|
||||
|
||||
#if CUDA_ARCH >= 350
|
||||
|
||||
const int c38 = c_minus_4 * 8;
|
||||
|
||||
#if VECT_SIZE == 1
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c38));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 2
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c38));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 4
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c38));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 8
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c38));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 16
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c38));
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c38));
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
r = hc_byte_perm (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff);
|
||||
|
||||
#endif
|
||||
const u32x r = hc_byte_perm (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
const int c_mod_4 = c & 3;
|
||||
|
||||
const int c_minus_4 = 4 - c_mod_4;
|
||||
|
||||
#if CUDA_ARCH >= 350
|
||||
|
||||
const int c38 = c_minus_4 * 8;
|
||||
|
||||
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c38));
|
||||
|
||||
#else
|
||||
|
||||
r = hc_byte_perm_S (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff);
|
||||
|
||||
#endif
|
||||
const u32 r = hc_byte_perm_S (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user