diff --git a/OpenCL/m34211_a0-pure.cl b/OpenCL/m34211_a0-pure.cl new file mode 100755 index 000000000..f36d8e393 --- /dev/null +++ b/OpenCL/m34211_a0-pure.cl @@ -0,0 +1,157 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp.h) +#include M2S(INCLUDE_PATH/inc_rp.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#endif + +DECLSPEC u32 MurmurHash64A_truncated (PRIVATE_AS const u32 *data, const u32 len) +{ +#define M 0xc6a4a7935bd1e995 +#define R 47 + + // Initialize hash + u64 hash = len * M; + + // Twice the number of u64 blocks + const u32 num_u32_blocks = (len / 8) * 2; + + // Loop over one u64 at a time + u32 i = 0; + while (i < num_u32_blocks) + { + // Reconstruct u64 from two u32s + u64 k = hl32_to_64 (data[i + 1], data[i]); + + k *= M; + k ^= k >> R; + k *= M; + + hash ^= k; + hash *= M; + + i += 2; + } + + // Up to 7 overflow bytes + const u32 overflow = len & 7; + + if (overflow > 4) + { + hash ^= hl32_to_64 (data[i + 1], data[i]); + hash *= M; + } + else if (overflow > 0) + { + hash ^= hl32_to_64 (0, data[i]); + hash *= M; + } + + hash ^= hash >> R; + hash *= M; + hash ^= hash >> R; + +#undef M +#undef R + + // Truncate to high 4 bytes + return (u32) (hash >> 32); +} + +KERNEL_FQ KERNEL_FA void m34211_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 hash = MurmurHash64A_truncated (tmp.i, tmp.pw_len); + + const u32 z = 0; + + COMPARE_M_SCALAR (hash, z, z, z); + } +} + +KERNEL_FQ KERNEL_FA void m34211_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 hash = MurmurHash64A_truncated (tmp.i, tmp.pw_len); + + const u32 z = 0; + + COMPARE_S_SCALAR (hash, z, z, z); + } +} diff --git a/OpenCL/m34211_a1-pure.cl b/OpenCL/m34211_a1-pure.cl new file mode 100755 index 000000000..bd53d64ee --- /dev/null +++ b/OpenCL/m34211_a1-pure.cl @@ -0,0 +1,173 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#endif + +DECLSPEC u32 MurmurHash64A_truncated (PRIVATE_AS const u32 *data, const u32 len) +{ +#define M 0xc6a4a7935bd1e995 +#define R 47 + + // Initialize hash + u64 hash = len * M; + + // Twice the number of u64 blocks + const u32 num_u32_blocks = (len / 8) * 2; + + // Loop over one u64 at a time + u32 i = 0; + while (i < num_u32_blocks) + { + // Reconstruct u64 from two u32s + u64 k = hl32_to_64 (data[i + 1], data[i]); + + k *= M; + k ^= k >> R; + k *= M; + + hash ^= k; + hash *= M; + + i += 2; + } + + // Up to 7 overflow bytes + const u32 overflow = len & 7; + + if (overflow > 4) + { + hash ^= hl32_to_64 (data[i + 1], data[i]); + hash *= M; + } + else if (overflow > 0) + { + hash ^= hl32_to_64 (0, data[i]); + hash *= M; + } + + hash ^= hash >> R; + hash *= M; + hash ^= hash >> R; + +#undef M +#undef R + + // Truncate to high 4 bytes + return (u32) (hash >> 32); +} + +KERNEL_FQ KERNEL_FA void m34211_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + PRIVATE_AS u8 combined_buf[256] = {0}; + const u32 *comb_ptr = (u32*) combined_buf; + + // copy left buffer + GLOBAL_AS const u8 *left = (GLOBAL_AS const u8*) pws[gid].i; + // probably bad for performance + for (u32 i = 0; i < pws[gid].pw_len; i++) + { + combined_buf[i] = left[i]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + // copy right buffer + GLOBAL_AS const u8 *right = (GLOBAL_AS const u8*) combs_buf[il_pos].i; + for (u32 i = 0; i < combs_buf[il_pos].pw_len; i++) + { + combined_buf[i + pws[gid].pw_len] = right[i]; + } + + u32 hash = MurmurHash64A_truncated (comb_ptr, pws[gid].pw_len + combs_buf[il_pos].pw_len); + + const u32 z = 0; + + COMPARE_M_SCALAR (hash, z, z, z); + } +} + +KERNEL_FQ KERNEL_FA void m34211_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + PRIVATE_AS u8 combined_buf[256] = {0}; + const u32 *comb_ptr = (u32*) combined_buf; + + // copy left buffer + GLOBAL_AS const u8 *left = (GLOBAL_AS const u8*) pws[gid].i; + // probably bad for performance + for (u32 i = 0; i < pws[gid].pw_len; i++) + { + combined_buf[i] = left[i]; + } + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + // copy right buffer + GLOBAL_AS const u8 *right = (GLOBAL_AS const u8*) combs_buf[il_pos].i; + for (u32 i = 0; i < combs_buf[il_pos].pw_len; i++) + { + combined_buf[i + pws[gid].pw_len] = right[i]; + } + + u32 hash = MurmurHash64A_truncated (comb_ptr, pws[gid].pw_len + combs_buf[il_pos].pw_len); + + const u32 z = 0; + + COMPARE_S_SCALAR (hash, z, z, z); + } +} diff --git a/OpenCL/m34211_a3-pure.cl b/OpenCL/m34211_a3-pure.cl new file mode 100755 index 000000000..bb846f189 --- /dev/null +++ b/OpenCL/m34211_a3-pure.cl @@ -0,0 +1,171 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#endif + +DECLSPEC u32 MurmurHash64A_truncated (PRIVATE_AS const u32 *data, const u32 len) +{ +#define M 0xc6a4a7935bd1e995 +#define R 47 + + // Initialize hash + u64 hash = len * M; + + // Twice the number of u64 blocks + const u32 num_u32_blocks = (len / 8) * 2; + + // Loop over one u64 at a time + u32 i = 0; + while (i < num_u32_blocks) + { + // Reconstruct u64 from two u32s + u64 k = hl32_to_64 (data[i + 1], data[i]); + + k *= M; + k ^= k >> R; + k *= M; + + hash ^= k; + hash *= M; + + i += 2; + } + + // Up to 7 overflow bytes + const u32 overflow = len & 7; + + if (overflow > 4) + { + hash ^= hl32_to_64 (data[i + 1], data[i]); + hash *= M; + } + else if (overflow > 0) + { + hash ^= hl32_to_64 (0, data[i]); + hash *= M; + } + + hash ^= hash >> R; + hash *= M; + hash ^= hash >> R; + +#undef M +#undef R + + // Truncate to high 4 bytes + return (u32) (hash >> 32); +} + +KERNEL_FQ KERNEL_FA void m34211_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + const u32x hash = MurmurHash64A_truncated (w, pw_len); + + const u32x z = 0; + + COMPARE_M_SIMD (hash, z, z, z); + } +} + +KERNEL_FQ KERNEL_FA void m34211_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + const u32x hash = MurmurHash64A_truncated (w, pw_len); + + const u32x z = 0; + + COMPARE_S_SIMD (hash, z, z, z); + } +}