mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-18 14:18:11 +00:00
dad03e394d
1) SIMD code for all attack-mode Macro vector_accessible() was not refactored and missing completely. Had to rename variables rules_cnt, combs_cnt and bfs_cnt into il_cnt which was a good thing anyway as with new SIMD code they all act in the same way. 2) SIMD code for attack-mode 0 With new SIMD code, apply_rules_vect() has to return u32 not u32x. This has massive impact on all *_a0 kernels. I've rewritten most of them. Deep testing using test.sh is still required. Some kernel need more fixes: - Some are kind of completely incompatible like m10400 but they still use old check_* includes, we should get rid of them as they are no longer neccessary as we have simd.c - Some have a chance but require additional effort like m11500. We can use commented out "#define NEW_SIMD_CODE" to find them This change can have negative impact on -a0 performance for device that require vectorization. That is mostly CPU devices. New GPU's are all scalar, so they wont get hurt by this. This change also proofes that there's no way to efficiently vectorize kernel rules with new SIMD code, but it enables the addition of the rule functions like @ that we were missing for some long time. This is a TODO.
1253 lines
127 KiB
C
1253 lines
127 KiB
C
/**
|
|
* Authors.....: Jens Steube <jens.steube@gmail.com>
|
|
* magnum <john.magnum@hushmail.com>
|
|
*
|
|
* License.....: MIT
|
|
*/
|
|
|
|
// vliw1
|
|
|
|
#if VECT_SIZE == 1
|
|
|
|
#define MATCHES_ONE_VV(a,b) ((a) == (b))
|
|
#define MATCHES_ONE_VS(a,b) ((a) == (b))
|
|
|
|
#define COMPARE_S_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPARE_M_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
const u32 digest_tp0[4] = { h0, h1, h2, h3 }; \
|
|
\
|
|
if (check (digest_tp0, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#endif
|
|
|
|
// vliw2
|
|
|
|
#define vector_accessible(p,c,e) (((p) + (e)) < (c))
|
|
|
|
#if VECT_SIZE == 2
|
|
|
|
#define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1))
|
|
#define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ))
|
|
|
|
#define COMPARE_S_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPARE_M_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
|
|
const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
|
|
\
|
|
if (check (digest_tp0, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp1, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#endif
|
|
|
|
// vliw4
|
|
|
|
#if VECT_SIZE == 4
|
|
|
|
#define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3))
|
|
#define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ))
|
|
|
|
#define COMPARE_S_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPARE_M_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
|
|
const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
|
|
const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
|
|
const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
|
|
\
|
|
if (check (digest_tp0, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp1, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp2, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp3, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#endif
|
|
|
|
// vliw8
|
|
|
|
#if VECT_SIZE == 8
|
|
|
|
#define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7))
|
|
#define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ))
|
|
|
|
#define COMPARE_S_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPARE_M_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
|
|
const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
|
|
const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
|
|
const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
|
|
const u32 digest_tp4[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
|
|
const u32 digest_tp5[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
|
|
const u32 digest_tp6[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
|
|
const u32 digest_tp7[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
|
|
\
|
|
if (check (digest_tp0, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp1, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp2, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp3, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
if (check (digest_tp4, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp5, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp6, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp7, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#endif
|
|
|
|
// vliw16
|
|
|
|
#if VECT_SIZE == 16
|
|
|
|
#define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7) || ((a).s8 == (b).s8) || ((a).s9 == (b).s9) || ((a).sa == (b).sa) || ((a).sb == (b).sb) || ((a).sc == (b).sc) || ((a).sd == (b).sd) || ((a).se == (b).se) || ((a).sf == (b).sf))
|
|
#define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ) || ((a).s8 == (b) ) || ((a).s9 == (b) ) || ((a).sa == (b) ) || ((a).sb == (b) ) || ((a).sc == (b) ) || ((a).sd == (b) ) || ((a).se == (b) ) || ((a).sf == (b) ))
|
|
|
|
#define COMPARE_S_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s8 == search[0]) && ((h1).s8 == search[1]) && ((h2).s8 == search[2]) && ((h3).s8 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 8) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).s9 == search[0]) && ((h1).s9 == search[1]) && ((h2).s9 == search[2]) && ((h3).s9 == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 9) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).sa == search[0]) && ((h1).sa == search[1]) && ((h2).sa == search[2]) && ((h3).sa == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 10) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).sb == search[0]) && ((h1).sb == search[1]) && ((h2).sb == search[2]) && ((h3).sb == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 11) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).sc == search[0]) && ((h1).sc == search[1]) && ((h2).sc == search[2]) && ((h3).sc == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 12) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).sd == search[0]) && ((h1).sd == search[1]) && ((h2).sd == search[2]) && ((h3).sd == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 13) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).se == search[0]) && ((h1).se == search[1]) && ((h2).se == search[2]) && ((h3).se == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 14) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
\
|
|
if (((h0).sf == search[0]) && ((h1).sf == search[1]) && ((h2).sf == search[2]) && ((h3).sf == search[3])) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + 0; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 15) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPARE_M_SIMD(h0,h1,h2,h3) \
|
|
{ \
|
|
const u32 digest_tp00[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
|
|
const u32 digest_tp01[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
|
|
const u32 digest_tp02[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
|
|
const u32 digest_tp03[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
|
|
const u32 digest_tp04[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
|
|
const u32 digest_tp05[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
|
|
const u32 digest_tp06[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
|
|
const u32 digest_tp07[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
|
|
const u32 digest_tp08[4] = { h0.s8, h1.s8, h2.s8, h3.s8 }; \
|
|
const u32 digest_tp09[4] = { h0.s9, h1.s9, h2.s9, h3.s9 }; \
|
|
const u32 digest_tp10[4] = { h0.sa, h1.sa, h2.sa, h3.sa }; \
|
|
const u32 digest_tp11[4] = { h0.sb, h1.sb, h2.sb, h3.sb }; \
|
|
const u32 digest_tp12[4] = { h0.sc, h1.sc, h2.sc, h3.sc }; \
|
|
const u32 digest_tp13[4] = { h0.sd, h1.sd, h2.sd, h3.sd }; \
|
|
const u32 digest_tp14[4] = { h0.se, h1.se, h2.se, h3.se }; \
|
|
const u32 digest_tp15[4] = { h0.sf, h1.sf, h2.sf, h3.sf }; \
|
|
\
|
|
if (check (digest_tp00, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp00, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp01, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp01, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp02, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp02, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp03, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp03, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp04, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp04, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp05, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp05, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp06, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp06, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp07, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp07, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp08, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp08, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 8) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp09, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp09, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 9) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp10, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 10) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp11, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 11) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp12, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 12) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp13, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 13) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp14, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 14) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if (check (digest_tp15, \
|
|
bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
|
|
bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
|
|
bitmap_mask, \
|
|
bitmap_shift1, \
|
|
bitmap_shift2)) \
|
|
{ \
|
|
int hash_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[digests_offset]); \
|
|
\
|
|
if (hash_pos != -1) \
|
|
{ \
|
|
const u32 final_hash_pos = digests_offset + hash_pos; \
|
|
\
|
|
if (vector_accessible (il_pos, il_cnt, 15) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
|
|
{ \
|
|
mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
|
|
\
|
|
d_return_buf[lid] = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#endif
|
|
|
|
#define MATCHES_NONE_VV(a,b) !(MATCHES_ONE_VV ((a), (b)))
|
|
#define MATCHES_NONE_VS(a,b) !(MATCHES_ONE_VS ((a), (b)))
|
|
|
|
// attack-mode 0
|
|
|
|
static inline u32x ix_create_bft (__global bf_t *bfs_buf, const u32 il_pos)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
const u32x ix = (u32x) (bfs_buf[il_pos + 0].i);
|
|
#elif VECT_SIZE == 2
|
|
const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
|
|
#elif VECT_SIZE == 4
|
|
const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
|
|
#elif VECT_SIZE == 8
|
|
const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
|
|
#elif VECT_SIZE == 16
|
|
const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
|
|
#endif
|
|
|
|
return ix;
|
|
}
|
|
|
|
// attack-mode 1
|
|
|
|
static inline u32x pwlenx_create_combt (__global comb_t *combs_buf, const u32 il_pos)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len);
|
|
#elif VECT_SIZE == 2
|
|
const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
|
|
#elif VECT_SIZE == 4
|
|
const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
|
|
#elif VECT_SIZE == 8
|
|
const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
|
|
#elif VECT_SIZE == 16
|
|
const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
|
|
#endif
|
|
|
|
return pw_lenx;
|
|
}
|
|
|
|
static inline u32x ix_create_combt (__global comb_t *combs_buf, const u32 il_pos, const int idx)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx]);
|
|
#elif VECT_SIZE == 2
|
|
const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
|
|
#elif VECT_SIZE == 4
|
|
const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
|
|
#elif VECT_SIZE == 8
|
|
const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
|
|
#elif VECT_SIZE == 16
|
|
const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
|
|
#endif
|
|
|
|
return ix;
|
|
}
|
|
|
|
#if VECT_SIZE == 1
|
|
#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)])
|
|
#elif VECT_SIZE == 2
|
|
#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)])
|
|
#elif VECT_SIZE == 4
|
|
#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)])
|
|
#elif VECT_SIZE == 8
|
|
#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)])
|
|
#elif VECT_SIZE == 16
|
|
#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
|
|
#endif
|
|
|
|
#if VECT_SIZE == 1
|
|
#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val;
|
|
#elif VECT_SIZE == 2
|
|
#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1;
|
|
#elif VECT_SIZE == 4
|
|
#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3;
|
|
#elif VECT_SIZE == 8
|
|
#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7;
|
|
#elif VECT_SIZE == 16
|
|
#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] = val.s0; (arr)[((gid) * 16) + 1].var[(idx)] = val.s1; (arr)[((gid) * 16) + 2].var[(idx)] = val.s2; (arr)[((gid) * 16) + 3].var[(idx)] = val.s3; (arr)[((gid) * 16) + 4].var[(idx)] = val.s4; (arr)[((gid) * 16) + 5].var[(idx)] = val.s5; (arr)[((gid) * 16) + 6].var[(idx)] = val.s6; (arr)[((gid) * 16) + 7].var[(idx)] = val.s7; (arr)[((gid) * 16) + 8].var[(idx)] = val.s8; (arr)[((gid) * 16) + 9].var[(idx)] = val.s9; (arr)[((gid) * 16) + 10].var[(idx)] = val.sa; (arr)[((gid) * 16) + 11].var[(idx)] = val.sb; (arr)[((gid) * 16) + 12].var[(idx)] = val.sc; (arr)[((gid) * 16) + 13].var[(idx)] = val.sd; (arr)[((gid) * 16) + 14].var[(idx)] = val.se; (arr)[((gid) * 16) + 15].var[(idx)] = val.sf;
|
|
#endif
|