2019-04-25 12:45:17 +00:00
|
|
|
/**
|
|
|
|
* Author......: See docs/credits.txt
|
|
|
|
* License.....: MIT
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "inc_vendor.h"
|
|
|
|
#include "inc_types.h"
|
|
|
|
#include "inc_platform.h"
|
|
|
|
|
|
|
|
#ifdef IS_NATIVE
|
2019-06-16 16:01:26 +00:00
|
|
|
#define FIXED_THREAD_COUNT(n)
|
2019-04-26 11:28:44 +00:00
|
|
|
#define SYNC_THREADS()
|
2019-04-25 12:45:17 +00:00
|
|
|
#endif
|
|
|
|
|
2019-06-20 08:04:31 +00:00
|
|
|
#ifdef IS_AMD
|
|
|
|
|
|
|
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
|
|
|
{
|
|
|
|
return rotr64 (a, 64 - n);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
|
|
|
{
|
|
|
|
#if VECT_SIZE == 1
|
|
|
|
return rotr64_S (a, n);
|
|
|
|
#else
|
|
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
|
|
|
{
|
|
|
|
return rotr64_S (a, 64 - n);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
|
|
|
{
|
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
const u32 a0 = in.v32.a;
|
|
|
|
const u32 a1 = in.v32.b;
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
if (n < 32)
|
|
|
|
{
|
|
|
|
out.v32.a = amd_bitalign (a1, a0, n);
|
|
|
|
out.v32.b = amd_bitalign (a0, a1, n);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
out.v32.a = amd_bitalign (a0, a1, n - 32);
|
|
|
|
out.v32.b = amd_bitalign (a1, a0, n - 32);
|
|
|
|
}
|
|
|
|
|
|
|
|
return out.v64;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2021-07-11 10:38:59 +00:00
|
|
|
#if defined IS_CUDA
|
|
|
|
|
|
|
|
#if ATTACK_EXEC == 11
|
|
|
|
|
|
|
|
CONSTANT_VK u32 generic_constant[8192]; // 32k
|
|
|
|
|
|
|
|
#if ATTACK_KERN == 0
|
|
|
|
#define bfs_buf g_bfs_buf
|
|
|
|
#define rules_buf ((const kernel_rule_t *) generic_constant)
|
|
|
|
#define words_buf_s g_words_buf_s
|
|
|
|
#define words_buf_r g_words_buf_r
|
|
|
|
#elif ATTACK_KERN == 1
|
|
|
|
#define bfs_buf g_bfs_buf
|
|
|
|
#define rules_buf g_rules_buf
|
|
|
|
#define words_buf_s g_words_buf_s
|
|
|
|
#define words_buf_r g_words_buf_r
|
|
|
|
#elif ATTACK_KERN == 3
|
|
|
|
#define rules_buf g_rules_buf
|
|
|
|
#define bfs_buf ((const bf_t *) generic_constant)
|
|
|
|
#define words_buf_s ((const bs_word_t *) generic_constant)
|
|
|
|
#define words_buf_r ((const u32x *) generic_constant)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
|
|
|
|
{
|
|
|
|
volatile const u32 val = 1;
|
|
|
|
|
|
|
|
return atomicSub (p, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
|
|
|
|
{
|
|
|
|
volatile const u32 val = 1;
|
|
|
|
|
|
|
|
return atomicAdd (p, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
|
|
|
|
{
|
|
|
|
return atomicOr (p, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
|
|
|
|
{
|
|
|
|
return (blockIdx.x * blockDim.x) + threadIdx.x;
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
|
|
|
|
{
|
|
|
|
return threadIdx.x;
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
|
|
|
|
{
|
|
|
|
// verify
|
|
|
|
return blockDim.x;
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32x rotl32 (const u32x a, const int n)
|
|
|
|
{
|
|
|
|
return ((a << n) | ((a >> (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32x rotr32 (const u32x a, const int n)
|
|
|
|
{
|
|
|
|
return ((a >> n) | ((a << (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 rotl32_S (const u32 a, const int n)
|
|
|
|
{
|
|
|
|
return ((a << n) | ((a >> (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 rotr32_S (const u32 a, const int n)
|
|
|
|
{
|
|
|
|
return ((a >> n) | ((a << (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
|
|
|
{
|
|
|
|
return ((a << n) | ((a >> (64 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
|
|
|
{
|
|
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
|
|
|
{
|
|
|
|
return ((a << n) | ((a >> (64 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
|
|
|
{
|
|
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
|
|
|
#define SYNC_THREADS() __syncthreads ()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined IS_HIP
|
2019-04-25 12:45:17 +00:00
|
|
|
|
2019-05-10 11:22:26 +00:00
|
|
|
#if ATTACK_EXEC == 11
|
|
|
|
|
2021-07-11 13:54:13 +00:00
|
|
|
CONSTANT_VK u32 generic_constant[8192] __attribute__((used)); // 32k
|
2019-05-10 11:22:26 +00:00
|
|
|
|
|
|
|
#if ATTACK_KERN == 0
|
2019-05-11 07:32:16 +00:00
|
|
|
#define bfs_buf g_bfs_buf
|
2019-05-10 11:22:26 +00:00
|
|
|
#define rules_buf ((const kernel_rule_t *) generic_constant)
|
|
|
|
#define words_buf_s g_words_buf_s
|
|
|
|
#define words_buf_r g_words_buf_r
|
|
|
|
#elif ATTACK_KERN == 1
|
2019-05-11 07:32:16 +00:00
|
|
|
#define bfs_buf g_bfs_buf
|
2019-05-10 11:22:26 +00:00
|
|
|
#define rules_buf g_rules_buf
|
|
|
|
#define words_buf_s g_words_buf_s
|
|
|
|
#define words_buf_r g_words_buf_r
|
|
|
|
#elif ATTACK_KERN == 3
|
|
|
|
#define rules_buf g_rules_buf
|
2019-05-11 07:32:16 +00:00
|
|
|
#define bfs_buf ((const bf_t *) generic_constant)
|
2019-05-10 11:22:26 +00:00
|
|
|
#define words_buf_s ((const bs_word_t *) generic_constant)
|
|
|
|
#define words_buf_r ((const u32x *) generic_constant)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2021-04-20 15:47:44 +00:00
|
|
|
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
|
2019-04-25 12:45:17 +00:00
|
|
|
{
|
2021-04-20 15:14:13 +00:00
|
|
|
volatile const u32 val = 1;
|
|
|
|
|
2021-07-18 19:14:45 +00:00
|
|
|
return __atomic_fetch_sub (p, val, __ATOMIC_RELAXED);
|
2019-04-25 12:45:17 +00:00
|
|
|
}
|
|
|
|
|
2021-04-20 15:47:44 +00:00
|
|
|
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
|
2019-04-25 12:45:17 +00:00
|
|
|
{
|
2021-04-20 15:14:13 +00:00
|
|
|
volatile const u32 val = 1;
|
|
|
|
|
2021-07-18 19:14:45 +00:00
|
|
|
return __atomic_fetch_add (p, val, __ATOMIC_RELAXED);
|
2019-04-25 12:45:17 +00:00
|
|
|
}
|
|
|
|
|
2021-04-20 15:47:44 +00:00
|
|
|
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
|
2019-04-26 11:28:44 +00:00
|
|
|
{
|
2021-07-18 19:14:45 +00:00
|
|
|
return __atomic_fetch_or (p, val, __ATOMIC_RELAXED);
|
2019-04-26 11:28:44 +00:00
|
|
|
}
|
|
|
|
|
2021-07-18 19:14:45 +00:00
|
|
|
extern "C" __device__ __attribute__((pure)) double __ocml_log2_f64(double);
|
|
|
|
|
|
|
|
DECLSPEC double log2 (double x)
|
|
|
|
{
|
|
|
|
return __ocml_log2_f64 (x);
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
|
|
|
|
extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
|
|
|
|
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
|
|
|
|
extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
|
|
|
|
|
2021-07-19 18:24:30 +00:00
|
|
|
DECLSPEC size_t get_global_id (const u32 dimindx)
|
2019-04-25 12:45:17 +00:00
|
|
|
{
|
2021-07-19 18:24:30 +00:00
|
|
|
return (__ockl_get_group_id (dimindx) * __ockl_get_local_size (dimindx)) + __ockl_get_local_id (dimindx);
|
2019-04-25 12:45:17 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 18:24:30 +00:00
|
|
|
DECLSPEC size_t get_local_id (const u32 dimindx)
|
2019-04-25 12:45:17 +00:00
|
|
|
{
|
2021-07-19 18:24:30 +00:00
|
|
|
return __ockl_get_local_id (dimindx);
|
2019-04-25 12:45:17 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 18:24:30 +00:00
|
|
|
DECLSPEC size_t get_local_size (const u32 dimindx)
|
2019-04-25 12:45:17 +00:00
|
|
|
{
|
2021-07-19 18:24:30 +00:00
|
|
|
return __ockl_get_local_size (dimindx);
|
2019-04-25 12:45:17 +00:00
|
|
|
}
|
|
|
|
|
2019-05-06 12:34:16 +00:00
|
|
|
DECLSPEC u32x rotl32 (const u32x a, const int n)
|
|
|
|
{
|
|
|
|
return ((a << n) | ((a >> (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32x rotr32 (const u32x a, const int n)
|
|
|
|
{
|
|
|
|
return ((a >> n) | ((a << (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 rotl32_S (const u32 a, const int n)
|
|
|
|
{
|
|
|
|
return ((a << n) | ((a >> (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 rotr32_S (const u32 a, const int n)
|
|
|
|
{
|
|
|
|
return ((a >> n) | ((a << (32 - n))));
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
|
|
|
{
|
2021-07-12 09:27:05 +00:00
|
|
|
return rotr64 (a, 64 - n);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n)
|
|
|
|
{
|
|
|
|
u32 r = 0;
|
|
|
|
|
2021-07-15 21:34:27 +00:00
|
|
|
__asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n));
|
2021-07-12 09:27:05 +00:00
|
|
|
|
|
|
|
return r;
|
2019-05-06 12:34:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
|
|
|
{
|
2021-07-12 09:27:05 +00:00
|
|
|
#if VECT_SIZE == 1
|
|
|
|
return rotr64_S (a, n);
|
|
|
|
#else
|
2019-05-06 12:34:16 +00:00
|
|
|
return ((a >> n) | ((a << (64 - n))));
|
2021-07-12 09:27:05 +00:00
|
|
|
#endif
|
2019-05-06 12:34:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
|
|
|
{
|
2021-07-12 09:27:05 +00:00
|
|
|
return rotr64_S (a, 64 - n);
|
2019-05-06 12:34:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
|
|
|
{
|
2021-07-12 09:27:05 +00:00
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
const u32 a0 = in.v32.a;
|
|
|
|
const u32 a1 = in.v32.b;
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
if (n < 32)
|
|
|
|
{
|
|
|
|
out.v32.a = amd_bitalign_S (a1, a0, n);
|
|
|
|
out.v32.b = amd_bitalign_S (a0, a1, n);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
out.v32.a = amd_bitalign_S (a0, a1, n - 32);
|
|
|
|
out.v32.b = amd_bitalign_S (a1, a0, n - 32);
|
|
|
|
}
|
|
|
|
|
|
|
|
return out.v64;
|
2019-05-06 12:34:16 +00:00
|
|
|
}
|
|
|
|
|
2021-07-19 18:24:30 +00:00
|
|
|
extern "C" __device__ int printf(const char *fmt, ...);
|
|
|
|
//int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
|
2021-07-18 20:56:22 +00:00
|
|
|
|
2021-07-19 18:24:30 +00:00
|
|
|
#define FIXED_THREAD_COUNT(n) __attribute__((amdgpu_flat_work_group_size (1, (n))))
|
2021-07-18 20:56:22 +00:00
|
|
|
#define SYNC_THREADS() __builtin_amdgcn_s_barrier ()
|
2019-04-25 12:45:17 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_OPENCL
|
2021-04-20 15:47:44 +00:00
|
|
|
|
|
|
|
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
|
|
|
|
{
|
|
|
|
volatile const u32 val = 1;
|
|
|
|
|
|
|
|
return atomic_sub (p, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p)
|
|
|
|
{
|
|
|
|
volatile const u32 val = 1;
|
|
|
|
|
|
|
|
return atomic_add (p, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val)
|
|
|
|
{
|
|
|
|
return atomic_or (p, val);
|
|
|
|
}
|
|
|
|
|
2019-06-16 16:01:26 +00:00
|
|
|
#define FIXED_THREAD_COUNT(n) __attribute__((reqd_work_group_size((n), 1, 1)))
|
2019-04-26 11:28:44 +00:00
|
|
|
#define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE)
|
2019-04-25 12:45:17 +00:00
|
|
|
#endif
|