|
|
|
@ -9,6 +9,207 @@
|
|
|
|
|
#include "inc_common.h"
|
|
|
|
|
#include "inc_hash_blake2b.h"
|
|
|
|
|
|
|
|
|
|
DECLSPEC u64 blake2b_rot16_S (const u64 a)
|
|
|
|
|
{
|
|
|
|
|
#if defined IS_NV
|
|
|
|
|
|
|
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
|
|
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x1076);
|
|
|
|
|
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x5432);
|
|
|
|
|
|
|
|
|
|
return out.v64;
|
|
|
|
|
|
|
|
|
|
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
|
|
|
|
|
|
|
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
|
|
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x01000706);
|
|
|
|
|
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x05040302);
|
|
|
|
|
|
|
|
|
|
return out.v64;
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
return hc_rotr64_S (a, 16);
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC u64x blake2b_rot16 (const u64x a)
|
|
|
|
|
{
|
|
|
|
|
u64x r;
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE == 1
|
|
|
|
|
r = blake2b_rot16_S (a);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 2
|
|
|
|
|
r.s0 = blake2b_rot16_S (a.s0);
|
|
|
|
|
r.s1 = blake2b_rot16_S (a.s1);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 4
|
|
|
|
|
r.s2 = blake2b_rot16_S (a.s2);
|
|
|
|
|
r.s3 = blake2b_rot16_S (a.s3);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 8
|
|
|
|
|
r.s4 = blake2b_rot16_S (a.s4);
|
|
|
|
|
r.s5 = blake2b_rot16_S (a.s5);
|
|
|
|
|
r.s6 = blake2b_rot16_S (a.s6);
|
|
|
|
|
r.s7 = blake2b_rot16_S (a.s7);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 16
|
|
|
|
|
r.s8 = blake2b_rot16_S (a.s8);
|
|
|
|
|
r.s9 = blake2b_rot16_S (a.s9);
|
|
|
|
|
r.sa = blake2b_rot16_S (a.sa);
|
|
|
|
|
r.sb = blake2b_rot16_S (a.sb);
|
|
|
|
|
r.sc = blake2b_rot16_S (a.sc);
|
|
|
|
|
r.sd = blake2b_rot16_S (a.sd);
|
|
|
|
|
r.se = blake2b_rot16_S (a.se);
|
|
|
|
|
r.sf = blake2b_rot16_S (a.sf);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC u64 blake2b_rot24_S (const u64 a)
|
|
|
|
|
{
|
|
|
|
|
#if defined IS_NV
|
|
|
|
|
|
|
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
|
|
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x2107);
|
|
|
|
|
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x6543);
|
|
|
|
|
|
|
|
|
|
return out.v64;
|
|
|
|
|
|
|
|
|
|
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
|
|
|
|
|
|
|
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
|
|
out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x02010007);
|
|
|
|
|
out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x06050403);
|
|
|
|
|
|
|
|
|
|
return out.v64;
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
return hc_rotr64_S (a, 16);
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC u64x blake2b_rot24 (const u64x a)
|
|
|
|
|
{
|
|
|
|
|
u64x r;
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE == 1
|
|
|
|
|
r = blake2b_rot24_S (a);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 2
|
|
|
|
|
r.s0 = blake2b_rot24_S (a.s0);
|
|
|
|
|
r.s1 = blake2b_rot24_S (a.s1);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 4
|
|
|
|
|
r.s2 = blake2b_rot24_S (a.s2);
|
|
|
|
|
r.s3 = blake2b_rot24_S (a.s3);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 8
|
|
|
|
|
r.s4 = blake2b_rot24_S (a.s4);
|
|
|
|
|
r.s5 = blake2b_rot24_S (a.s5);
|
|
|
|
|
r.s6 = blake2b_rot24_S (a.s6);
|
|
|
|
|
r.s7 = blake2b_rot24_S (a.s7);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 16
|
|
|
|
|
r.s8 = blake2b_rot24_S (a.s8);
|
|
|
|
|
r.s9 = blake2b_rot24_S (a.s9);
|
|
|
|
|
r.sa = blake2b_rot24_S (a.sa);
|
|
|
|
|
r.sb = blake2b_rot24_S (a.sb);
|
|
|
|
|
r.sc = blake2b_rot24_S (a.sc);
|
|
|
|
|
r.sd = blake2b_rot24_S (a.sd);
|
|
|
|
|
r.se = blake2b_rot24_S (a.se);
|
|
|
|
|
r.sf = blake2b_rot24_S (a.sf);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC u64 blake2b_rot32_S (const u64 a)
|
|
|
|
|
{
|
|
|
|
|
vconv64_t in;
|
|
|
|
|
|
|
|
|
|
in.v64 = a;
|
|
|
|
|
|
|
|
|
|
vconv64_t out;
|
|
|
|
|
|
|
|
|
|
out.v32.a = in.v32.b;
|
|
|
|
|
out.v32.b = in.v32.a;
|
|
|
|
|
|
|
|
|
|
return out.v64;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC u64x blake2b_rot32 (const u64x a)
|
|
|
|
|
{
|
|
|
|
|
u64x r;
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE == 1
|
|
|
|
|
r = blake2b_rot32_S (a);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 2
|
|
|
|
|
r.s0 = blake2b_rot32_S (a.s0);
|
|
|
|
|
r.s1 = blake2b_rot32_S (a.s1);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 4
|
|
|
|
|
r.s2 = blake2b_rot32_S (a.s2);
|
|
|
|
|
r.s3 = blake2b_rot32_S (a.s3);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 8
|
|
|
|
|
r.s4 = blake2b_rot32_S (a.s4);
|
|
|
|
|
r.s5 = blake2b_rot32_S (a.s5);
|
|
|
|
|
r.s6 = blake2b_rot32_S (a.s6);
|
|
|
|
|
r.s7 = blake2b_rot32_S (a.s7);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if VECT_SIZE >= 16
|
|
|
|
|
r.s8 = blake2b_rot32_S (a.s8);
|
|
|
|
|
r.s9 = blake2b_rot32_S (a.s9);
|
|
|
|
|
r.sa = blake2b_rot32_S (a.sa);
|
|
|
|
|
r.sb = blake2b_rot32_S (a.sb);
|
|
|
|
|
r.sc = blake2b_rot32_S (a.sc);
|
|
|
|
|
r.sd = blake2b_rot32_S (a.sd);
|
|
|
|
|
r.se = blake2b_rot32_S (a.se);
|
|
|
|
|
r.sf = blake2b_rot32_S (a.sf);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const int len, const u64 f0)
|
|
|
|
|
{
|
|
|
|
|
const u64 t0 = hl32_to_64_S (0, len);
|
|
|
|
|