Optimize BLAKE2B_ROUND() 64 bit rotates

pull/2884/head
Jens Steube 3 years ago
parent 3becb253d3
commit bd92589af1

@ -9,6 +9,207 @@
#include "inc_common.h"
#include "inc_hash_blake2b.h"
DECLSPEC u64 blake2b_rot16_S (const u64 a)
{
#if defined IS_NV
vconv64_t in;
in.v64 = a;
vconv64_t out;
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x1076);
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x5432);
return out.v64;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
vconv64_t in;
in.v64 = a;
vconv64_t out;
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x01000706);
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x05040302);
return out.v64;
#else
return hc_rotr64_S (a, 16);
#endif
}
DECLSPEC u64x blake2b_rot16 (const u64x a)
{
u64x r;
#if VECT_SIZE == 1
r = blake2b_rot16_S (a);
#endif
#if VECT_SIZE >= 2
r.s0 = blake2b_rot16_S (a.s0);
r.s1 = blake2b_rot16_S (a.s1);
#endif
#if VECT_SIZE >= 4
r.s2 = blake2b_rot16_S (a.s2);
r.s3 = blake2b_rot16_S (a.s3);
#endif
#if VECT_SIZE >= 8
r.s4 = blake2b_rot16_S (a.s4);
r.s5 = blake2b_rot16_S (a.s5);
r.s6 = blake2b_rot16_S (a.s6);
r.s7 = blake2b_rot16_S (a.s7);
#endif
#if VECT_SIZE >= 16
r.s8 = blake2b_rot16_S (a.s8);
r.s9 = blake2b_rot16_S (a.s9);
r.sa = blake2b_rot16_S (a.sa);
r.sb = blake2b_rot16_S (a.sb);
r.sc = blake2b_rot16_S (a.sc);
r.sd = blake2b_rot16_S (a.sd);
r.se = blake2b_rot16_S (a.se);
r.sf = blake2b_rot16_S (a.sf);
#endif
return r;
}
DECLSPEC u64 blake2b_rot24_S (const u64 a)
{
#if defined IS_NV
vconv64_t in;
in.v64 = a;
vconv64_t out;
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x2107);
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x6543);
return out.v64;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
vconv64_t in;
in.v64 = a;
vconv64_t out;
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x02010007);
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x06050403);
return out.v64;
#else
return hc_rotr64_S (a, 16);
#endif
}
DECLSPEC u64x blake2b_rot24 (const u64x a)
{
u64x r;
#if VECT_SIZE == 1
r = blake2b_rot24_S (a);
#endif
#if VECT_SIZE >= 2
r.s0 = blake2b_rot24_S (a.s0);
r.s1 = blake2b_rot24_S (a.s1);
#endif
#if VECT_SIZE >= 4
r.s2 = blake2b_rot24_S (a.s2);
r.s3 = blake2b_rot24_S (a.s3);
#endif
#if VECT_SIZE >= 8
r.s4 = blake2b_rot24_S (a.s4);
r.s5 = blake2b_rot24_S (a.s5);
r.s6 = blake2b_rot24_S (a.s6);
r.s7 = blake2b_rot24_S (a.s7);
#endif
#if VECT_SIZE >= 16
r.s8 = blake2b_rot24_S (a.s8);
r.s9 = blake2b_rot24_S (a.s9);
r.sa = blake2b_rot24_S (a.sa);
r.sb = blake2b_rot24_S (a.sb);
r.sc = blake2b_rot24_S (a.sc);
r.sd = blake2b_rot24_S (a.sd);
r.se = blake2b_rot24_S (a.se);
r.sf = blake2b_rot24_S (a.sf);
#endif
return r;
}
DECLSPEC u64 blake2b_rot32_S (const u64 a)
{
vconv64_t in;
in.v64 = a;
vconv64_t out;
out.v32.a = in.v32.b;
out.v32.b = in.v32.a;
return out.v64;
}
DECLSPEC u64x blake2b_rot32 (const u64x a)
{
u64x r;
#if VECT_SIZE == 1
r = blake2b_rot32_S (a);
#endif
#if VECT_SIZE >= 2
r.s0 = blake2b_rot32_S (a.s0);
r.s1 = blake2b_rot32_S (a.s1);
#endif
#if VECT_SIZE >= 4
r.s2 = blake2b_rot32_S (a.s2);
r.s3 = blake2b_rot32_S (a.s3);
#endif
#if VECT_SIZE >= 8
r.s4 = blake2b_rot32_S (a.s4);
r.s5 = blake2b_rot32_S (a.s5);
r.s6 = blake2b_rot32_S (a.s6);
r.s7 = blake2b_rot32_S (a.s7);
#endif
#if VECT_SIZE >= 16
r.s8 = blake2b_rot32_S (a.s8);
r.s9 = blake2b_rot32_S (a.s9);
r.sa = blake2b_rot32_S (a.sa);
r.sb = blake2b_rot32_S (a.sb);
r.sc = blake2b_rot32_S (a.sc);
r.sd = blake2b_rot32_S (a.sd);
r.se = blake2b_rot32_S (a.se);
r.sf = blake2b_rot32_S (a.sf);
#endif
return r;
}
DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const int len, const u64 f0)
{
const u64 t0 = hl32_to_64_S (0, len);

@ -9,14 +9,23 @@
#define BLAKE2B_UPDATE 0
#define BLAKE2B_FINAL -1
DECLSPEC u64 blake2b_rot16_S (const u64 a);
DECLSPEC u64x blake2b_rot16 (const u64x a);
DECLSPEC u64 blake2b_rot24_S (const u64 a);
DECLSPEC u64x blake2b_rot24 (const u64x a);
DECLSPEC u64 blake2b_rot32_S (const u64 a);
DECLSPEC u64x blake2b_rot32 (const u64x a);
#define BLAKE2B_G(k0,k1,a,b,c,d) \
{ \
a = a + b + m[k0]; \
d = hc_rotr64_S (d ^ a, 32); \
d = blake2b_rot32_S (d ^ a); \
c = c + d; \
b = hc_rotr64_S (b ^ c, 24); \
b = blake2b_rot24_S (b ^ c); \
a = a + b + m[k1]; \
d = hc_rotr64_S (d ^ a, 16); \
d = blake2b_rot16_S (d ^ a); \
c = c + d; \
b = hc_rotr64_S (b ^ c, 63); \
}
@ -36,11 +45,11 @@
#define BLAKE2B_G_VECTOR(k0,k1,a,b,c,d) \
{ \
a = a + b + m[k0]; \
d = hc_rotr64 (d ^ a, 32); \
d = blake2b_rot32 (d ^ a); \
c = c + d; \
b = hc_rotr64 (b ^ c, 24); \
b = blake2b_rot24 (b ^ c); \
a = a + b + m[k1]; \
d = hc_rotr64 (d ^ a, 16); \
d = blake2b_rot16 (d ^ a); \
c = c + d; \
b = hc_rotr64 (b ^ c, 63); \
}

Loading…
Cancel
Save