Some more ROCm performance tuning

pull/2070/head
Jens Steube 5 years ago
parent 345d953120
commit 316095c151

@ -353,6 +353,8 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
return rotl64 (a, n);
#elif defined IS_CUDA
return rotl64 (a, n);
#elif defined IS_AMD
return rotl64 (a, n);
#else
return rotate (a, make_u64x (n));
#endif
@ -364,6 +366,8 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
return rotr64 (a, n);
#elif defined IS_CUDA
return rotr64 (a, n);
#elif defined IS_AMD
return rotr64 (a, n);
#else
return rotate (a, make_u64x (64 - n));
#endif
@ -375,6 +379,8 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n)
return rotl64 (a, n);
#elif defined IS_CUDA
return rotl64_S (a, n);
#elif defined IS_AMD
return rotl64_S (a, n);
#else
return rotate (a, (u64) (n));
#endif
@ -386,6 +392,8 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n)
return rotr64 (a, n);
#elif defined IS_CUDA
return rotr64_S (a, n);
#elif defined IS_AMD
return rotr64_S (a, n);
#else
return rotate (a, (u64) (64 - n));
#endif

@ -12,6 +12,54 @@
#define SYNC_THREADS()
#endif
#ifdef IS_AMD
DECLSPEC u64x rotl64 (const u64x a, const int n)
{
return rotr64 (a, 64 - n);
}
DECLSPEC u64x rotr64 (const u64x a, const int n)
{
#if VECT_SIZE == 1
return rotr64_S (a, n);
#else
return ((a >> n) | ((a << (64 - n))));
#endif
}
DECLSPEC u64 rotl64_S (const u64 a, const int n)
{
return rotr64_S (a, 64 - n);
}
DECLSPEC u64 rotr64_S (const u64 a, const int n)
{
vconv64_t in;
in.v64 = a;
const u32 a0 = in.v32.a;
const u32 a1 = in.v32.b;
vconv64_t out;
if (n < 32)
{
out.v32.a = amd_bitalign (a1, a0, n);
out.v32.b = amd_bitalign (a0, a1, n);
}
else
{
out.v32.a = amd_bitalign (a0, a1, n - 32);
out.v32.b = amd_bitalign (a1, a0, n - 32);
}
return out.v64;
}
#endif
#ifdef IS_CUDA
#if ATTACK_EXEC == 11

@ -6,6 +6,13 @@
#ifndef _INC_PLATFORM_H
#define _INC_PLATFORM_H
#ifdef IS_AMD
DECLSPEC u64x rotl64 (const u64x a, const int n);
DECLSPEC u64x rotr64 (const u64x a, const int n);
DECLSPEC u64 rotl64_S (const u64 a, const int n);
DECLSPEC u64 rotr64_S (const u64 a, const int n);
#endif
#ifdef IS_CUDA
DECLSPEC u32 atomic_dec (u32 *p);
DECLSPEC u32 atomic_inc (u32 *p);

Loading…
Cancel
Save