diff --git a/OpenCL/inc_hash_blake2s.cl b/OpenCL/inc_hash_blake2s.cl index 913ff7ef5..18b197c7b 100644 --- a/OpenCL/inc_hash_blake2s.cl +++ b/OpenCL/inc_hash_blake2s.cl @@ -22,18 +22,18 @@ DECLSPEC u32 blake2s_rot16_S (const u32 a) out.v16.a = hc_byte_perm_S (in.v16.b, in.v16.a, 0x1076); out.v16.b = hc_byte_perm_S (in.v16.b, in.v16.a, 0x5432); - return out.v64; + return out.v32; #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 - vconv64_t in; + vconv32_t in; - in.v64 = a; + in.v32 = a; - vconv64_t out; + vconv32_t out; - out.v32.a = hc_byte_perm_S (in.v32.b, in.v32.a, 0x01000706); - out.v32.b = hc_byte_perm_S (in.v32.b, in.v32.a, 0x05040302); + out.v16.a = hc_byte_perm_S (in.v16.b, in.v16.a, 0x01000706); + out.v16.b = hc_byte_perm_S (in.v16.b, in.v16.a, 0x05040302); return out.v64; @@ -233,18 +233,16 @@ DECLSPEC void blake2b_transform (PRIVATE_AS u64 *h, PRIVATE_AS const u64 *m, con v[14] = BLAKE2B_IV_06 ^ f0; v[15] = BLAKE2B_IV_07; // ^ f1; - BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); - BLAKE2B_ROUND (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); - BLAKE2B_ROUND ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); - BLAKE2B_ROUND ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); - BLAKE2B_ROUND ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); - BLAKE2B_ROUND (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); - BLAKE2B_ROUND (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); - BLAKE2B_ROUND ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); - BLAKE2B_ROUND (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0); - BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); + BLAKE2S_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + BLAKE2S_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); + BLAKE2S_ROUND (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); + BLAKE2S_ROUND ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); + BLAKE2S_ROUND ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); + BLAKE2S_ROUND ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); + BLAKE2S_ROUND (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); + BLAKE2S_ROUND (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); + BLAKE2S_ROUND ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); + BLAKE2S_ROUND (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0); h[0] = h[0] ^ v[0] ^ v[ 8]; h[1] = h[1] ^ v[1] ^ v[ 9]; @@ -606,18 +604,16 @@ DECLSPEC void blake2b_transform_vector (PRIVATE_AS u64x *h, PRIVATE_AS const u64 v[14] = make_u64x (BLAKE2B_IV_06) ^ f0; v[15] = BLAKE2B_IV_07; // ^ f1; - BLAKE2B_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - BLAKE2B_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); - BLAKE2B_ROUND_VECTOR (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); - BLAKE2B_ROUND_VECTOR ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); - BLAKE2B_ROUND_VECTOR ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); - BLAKE2B_ROUND_VECTOR ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); - BLAKE2B_ROUND_VECTOR (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); - BLAKE2B_ROUND_VECTOR (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); - BLAKE2B_ROUND_VECTOR ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); - BLAKE2B_ROUND_VECTOR (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0); - BLAKE2B_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - BLAKE2B_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); + BLAKE2S_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + BLAKE2S_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); + BLAKE2S_ROUND_VECTOR (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); + BLAKE2S_ROUND_VECTOR ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); + BLAKE2S_ROUND_VECTOR ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); + BLAKE2S_ROUND_VECTOR ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); + BLAKE2S_ROUND_VECTOR (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); + BLAKE2S_ROUND_VECTOR (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); + BLAKE2S_ROUND_VECTOR ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); + BLAKE2S_ROUND_VECTOR (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0); h[0] = h[0] ^ v[0] ^ v[ 8]; h[1] = h[1] ^ v[1] ^ v[ 9]; diff --git a/OpenCL/inc_hash_blake2s.h b/OpenCL/inc_hash_blake2s.h index 56ac40774..a6ef95d27 100644 --- a/OpenCL/inc_hash_blake2s.h +++ b/OpenCL/inc_hash_blake2s.h @@ -28,9 +28,9 @@ DECLSPEC u32x blake2s_rot07 (const u32x a); c = c + d; \ b = blake2s_rot12_S (b ^ c); \ a = a + b + m[k1]; \ - d = blake2s_rot8_S (d ^ a); \ + d = blake2s_rot08_S (d ^ a); \ c = c + d; \ - b = blake2s_rot7_S (b ^ c); \ + b = blake2s_rot07_S (b ^ c); \ } #define BLAKE2S_ROUND(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \ @@ -45,16 +45,16 @@ DECLSPEC u32x blake2s_rot07 (const u32x a); BLAKE2B_G (ce, cf, v[3], v[4], v[ 9], v[14]); \ } -#define BLAKE2B_G_VECTOR(k0,k1,a,b,c,d) \ +#define BLAKE2S_G_VECTOR(k0,k1,a,b,c,d) \ { \ a = a + b + m[k0]; \ - d = blake2b_rot32 (d ^ a); \ + d = blake2s_rot16 (d ^ a); \ c = c + d; \ - b = blake2b_rot24 (b ^ c); \ + b = blake2s_rot12 (b ^ c); \ a = a + b + m[k1]; \ - d = blake2b_rot16 (d ^ a); \ + d = blake2s_rot08 (d ^ a); \ c = c + d; \ - b = hc_rotr64 (b ^ c, 63); \ + b = blake2s_rot07 (b ^ c); \ } #define BLAKE2B_ROUND_VECTOR(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \