diff --git a/OpenCL/m00600_a0.cl b/OpenCL/m00600_a0.cl index d9d6e538d..1dd54d604 100644 --- a/OpenCL/m00600_a0.cl +++ b/OpenCL/m00600_a0.cl @@ -17,22 +17,6 @@ #define BLAKE2B_FINAL 1 #define BLAKE2B_UPDATE 0 -__constant u8a blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - #define BLAKE2B_G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ @@ -57,7 +41,7 @@ __constant u8a blake2b_sigma[12][16] = BLAKE2B_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) -void blake2b_compress(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) +void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) { if (isFinal) f[0] = -1; @@ -98,6 +82,22 @@ void blake2b_compress(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], c v[14] = BLAKE2B_IV_06 ^ f[0]; v[15] = BLAKE2B_IV_07 ^ f[1]; + const u8a blake2b_sigma[12][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } + }; + BLAKE2B_ROUND( 0); BLAKE2B_ROUND( 1); BLAKE2B_ROUND( 2); @@ -180,7 +180,7 @@ __kernel void m00600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule f[0] = esalt_bufs->f[0]; f[1] = esalt_bufs->f[1]; - blake2b_compress(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); + blake2b_transform(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); digest[0] = h[0]; digest[1] = h[1]; @@ -281,7 +281,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule f[0] = esalt_bufs->f[0]; f[1] = esalt_bufs->f[1]; - blake2b_compress(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); + blake2b_transform(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); digest[0] = h[0]; digest[1] = h[1]; diff --git a/OpenCL/m00600_a1.cl b/OpenCL/m00600_a1.cl index 101e30f3c..de588a163 100644 --- a/OpenCL/m00600_a1.cl +++ b/OpenCL/m00600_a1.cl @@ -17,22 +17,6 @@ #define BLAKE2B_FINAL 1 #define BLAKE2B_UPDATE 0 -__constant u8 blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - #define BLAKE2B_G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ @@ -57,7 +41,7 @@ __constant u8 blake2b_sigma[12][16] = BLAKE2B_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) -void blake2b_compress(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) +void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) { if (isFinal) f[0] = -1; @@ -98,6 +82,22 @@ void blake2b_compress(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], c v[14] = BLAKE2B_IV_06 ^ f[0]; v[15] = BLAKE2B_IV_07 ^ f[1]; + const u8 blake2b_sigma[12][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } + }; + BLAKE2B_ROUND( 0); BLAKE2B_ROUND( 1); BLAKE2B_ROUND( 2); @@ -239,7 +239,7 @@ __kernel void m00600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule f[0] = esalt_bufs->f[0]; f[1] = esalt_bufs->f[1]; - blake2b_compress(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); + blake2b_transform(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); digest[0] = h[0]; digest[1] = h[1]; @@ -400,7 +400,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule f[0] = esalt_bufs->f[0]; f[1] = esalt_bufs->f[1]; - blake2b_compress(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); + blake2b_transform(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); digest[0] = h[0]; digest[1] = h[1]; diff --git a/OpenCL/m00600_a3.cl b/OpenCL/m00600_a3.cl index 140b3cc66..a71d932b7 100644 --- a/OpenCL/m00600_a3.cl +++ b/OpenCL/m00600_a3.cl @@ -15,22 +15,6 @@ #define BLAKE2B_FINAL 1 #define BLAKE2B_UPDATE 0 -__constant u8 blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - #define BLAKE2B_G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ @@ -55,7 +39,7 @@ __constant u8 blake2b_sigma[12][16] = BLAKE2B_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) -void blake2b_compress(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) +void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) { if (isFinal) f[0] = -1; @@ -96,6 +80,22 @@ void blake2b_compress(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], c v[14] = BLAKE2B_IV_06 ^ f[0]; v[15] = BLAKE2B_IV_07 ^ f[1]; + const u8 blake2b_sigma[12][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } + }; + BLAKE2B_ROUND( 0); BLAKE2B_ROUND( 1); BLAKE2B_ROUND( 2); @@ -185,7 +185,7 @@ __kernel void m00600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule f[0] = esalt_bufs->f[0]; f[1] = esalt_bufs->f[1]; - blake2b_compress(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); + blake2b_transform(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); digest[0] = h[0]; digest[1] = h[1]; @@ -291,7 +291,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule f[0] = esalt_bufs->f[0]; f[1] = esalt_bufs->f[1]; - blake2b_compress(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); + blake2b_transform(h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL); digest[0] = h[0]; digest[1] = h[1];