diff --git a/OpenCL/m01470_a0-pure.cl b/OpenCL/m01470_a0-pure.cl index e9926a7b8..46f2c892f 100644 --- a/OpenCL/m01470_a0-pure.cl +++ b/OpenCL/m01470_a0-pure.cl @@ -37,6 +37,11 @@ KERNEL_FQ void m01470_mxx (KERN_ATTR_RULES ()) * loop */ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { pw_t tmp = PASTE_PW; @@ -55,14 +60,25 @@ KERNEL_FQ void m01470_mxx (KERN_ATTR_RULES ()) sha256_init (&ctx); - u32 _w0[4]; - _w0[0] = ctx0.h[0]; _w0[1] = ctx0.h[1]; _w0[2] = ctx0.h[2]; _w0[3] = ctx0.h[3]; - u32 _w1[4]; - _w1[0] = ctx0.h[4]; _w1[1] = ctx0.h[5]; _w1[2] = ctx0.h[6]; _w1[3] = ctx0.h[7]; - u32 _w2[4] = { 0, 0, 0, 0 }; - u32 _w3[4] = { 0, 0, 0, 0 }; + w0[0] = ctx0.h[0]; + w0[1] = ctx0.h[1]; + w0[2] = ctx0.h[2]; + w0[3] = ctx0.h[3]; + w1[0] = ctx0.h[4]; + w1[1] = ctx0.h[5]; + w1[2] = ctx0.h[6]; + w1[3] = ctx0.h[7]; - sha256_update_64 (&ctx, _w0, _w1, _w2, _w3, 32); + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 32); sha256_final (&ctx); @@ -108,6 +124,11 @@ KERNEL_FQ void m01470_sxx (KERN_ATTR_RULES ()) * loop */ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { pw_t tmp = PASTE_PW; @@ -126,14 +147,25 @@ KERNEL_FQ void m01470_sxx (KERN_ATTR_RULES ()) sha256_init (&ctx); - u32 _w0[4]; - _w0[0] = ctx0.h[0]; _w0[1] = ctx0.h[1]; _w0[2] = ctx0.h[2]; _w0[3] = ctx0.h[3]; - u32 _w1[4]; - _w1[0] = ctx0.h[4]; _w1[1] = ctx0.h[5]; _w1[2] = ctx0.h[6]; _w1[3] = ctx0.h[7]; - u32 _w2[4] = { 0, 0, 0, 0 }; - u32 _w3[4] = { 0, 0, 0, 0 }; + w0[0] = ctx0.h[0]; + w0[1] = ctx0.h[1]; + w0[2] = ctx0.h[2]; + w0[3] = ctx0.h[3]; + w1[0] = ctx0.h[4]; + w1[1] = ctx0.h[5]; + w1[2] = ctx0.h[6]; + w1[3] = ctx0.h[7]; - sha256_update_64 (&ctx, _w0, _w1, _w2, _w3, 32); + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 32); sha256_final (&ctx); diff --git a/OpenCL/m01470_a1-pure.cl b/OpenCL/m01470_a1-pure.cl index eae6a32e8..2ba848069 100644 --- a/OpenCL/m01470_a1-pure.cl +++ b/OpenCL/m01470_a1-pure.cl @@ -39,6 +39,11 @@ KERNEL_FQ void m01470_mxx (KERN_ATTR_BASIC ()) * loop */ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { sha256_ctx_t ctx0 = ctx1; @@ -51,14 +56,25 @@ KERNEL_FQ void m01470_mxx (KERN_ATTR_BASIC ()) sha256_init (&ctx); - u32 _w0[4]; - _w0[0] = ctx0.h[0]; _w0[1] = ctx0.h[1]; _w0[2] = ctx0.h[2]; _w0[3] = ctx0.h[3]; - u32 _w1[4]; - _w1[0] = ctx0.h[4]; _w1[1] = ctx0.h[5]; _w1[2] = ctx0.h[6]; _w1[3] = ctx0.h[7]; - u32 _w2[4] = { 0, 0, 0, 0 }; - u32 _w3[4] = { 0, 0, 0, 0 }; + w0[0] = ctx0.h[0]; + w0[1] = ctx0.h[1]; + w0[2] = ctx0.h[2]; + w0[3] = ctx0.h[3]; + w1[0] = ctx0.h[4]; + w1[1] = ctx0.h[5]; + w1[2] = ctx0.h[6]; + w1[3] = ctx0.h[7]; - sha256_update_64 (&ctx, _w0, _w1, _w2, _w3, 32); + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 32); sha256_final (&ctx); @@ -108,6 +124,11 @@ KERNEL_FQ void m01470_sxx (KERN_ATTR_BASIC ()) * loop */ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { sha256_ctx_t ctx0 = ctx1; @@ -120,14 +141,25 @@ KERNEL_FQ void m01470_sxx (KERN_ATTR_BASIC ()) sha256_init (&ctx); - u32 _w0[4]; - _w0[0] = ctx0.h[0]; _w0[1] = ctx0.h[1]; _w0[2] = ctx0.h[2]; _w0[3] = ctx0.h[3]; - u32 _w1[4]; - _w1[0] = ctx0.h[4]; _w1[1] = ctx0.h[5]; _w1[2] = ctx0.h[6]; _w1[3] = ctx0.h[7]; - u32 _w2[4] = { 0, 0, 0, 0 }; - u32 _w3[4] = { 0, 0, 0, 0 }; + w0[0] = ctx0.h[0]; + w0[1] = ctx0.h[1]; + w0[2] = ctx0.h[2]; + w0[3] = ctx0.h[3]; + w1[0] = ctx0.h[4]; + w1[1] = ctx0.h[5]; + w1[2] = ctx0.h[6]; + w1[3] = ctx0.h[7]; - sha256_update_64 (&ctx, _w0, _w1, _w2, _w3, 32); + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 32); sha256_final (&ctx); diff --git a/OpenCL/m01470_a3-pure.cl b/OpenCL/m01470_a3-pure.cl index 18071aae9..da1f34954 100644 --- a/OpenCL/m01470_a3-pure.cl +++ b/OpenCL/m01470_a3-pure.cl @@ -42,6 +42,11 @@ KERNEL_FQ void m01470_mxx (KERN_ATTR_VECTOR ()) * loop */ + u32x _w0[4]; + u32x _w1[4]; + u32x _w2[4]; + u32x _w3[4]; + u32x w0l = w[0]; for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) @@ -64,14 +69,23 @@ KERNEL_FQ void m01470_mxx (KERN_ATTR_VECTOR ()) sha256_init_vector (&ctx); - u32x _w0[4]; - _w0[0] = ctx0.h[0]; _w0[1] = ctx0.h[1]; _w0[2] = ctx0.h[2]; _w0[3] = ctx0.h[3]; - u32x _w1[4]; - _w1[0] = ctx0.h[4]; _w1[1] = ctx0.h[5]; _w1[2] = ctx0.h[6]; _w1[3] = ctx0.h[7]; - u32x _w2[4]; - _w2[0] = 0; _w2[1] = 0; _w2[2] = 0; _w2[3] = 0; - u32x _w3[4]; - _w3[0] = 0; _w3[1] = 0; _w3[2] = 0; _w3[3] = 0; + _w0[0] = ctx0.h[0]; + _w0[1] = ctx0.h[1]; + _w0[2] = ctx0.h[2]; + _w0[3] = ctx0.h[3]; + _w1[0] = ctx0.h[4]; + _w1[1] = ctx0.h[5]; + _w1[2] = ctx0.h[6]; + _w1[3] = ctx0.h[7]; + + _w2[0] = 0; + _w2[1] = 0; + _w2[2] = 0; + _w2[3] = 0; + _w3[0] = 0; + _w3[1] = 0; + _w3[2] = 0; + _w3[3] = 0; sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32); @@ -126,6 +140,11 @@ KERNEL_FQ void m01470_sxx (KERN_ATTR_VECTOR ()) * loop */ + u32x _w0[4]; + u32x _w1[4]; + u32x _w2[4]; + u32x _w3[4]; + u32x w0l = w[0]; for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) @@ -148,14 +167,23 @@ KERNEL_FQ void m01470_sxx (KERN_ATTR_VECTOR ()) sha256_init_vector (&ctx); - u32x _w0[4]; - _w0[0] = ctx0.h[0]; _w0[1] = ctx0.h[1]; _w0[2] = ctx0.h[2]; _w0[3] = ctx0.h[3]; - u32x _w1[4]; - _w1[0] = ctx0.h[4]; _w1[1] = ctx0.h[5]; _w1[2] = ctx0.h[6]; _w1[3] = ctx0.h[7]; - u32x _w2[4]; - _w2[0] = 0; _w2[1] = 0; _w2[2] = 0; _w2[3] = 0; - u32x _w3[4]; - _w3[0] = 0; _w3[1] = 0; _w3[2] = 0; _w3[3] = 0; + _w0[0] = ctx0.h[0]; + _w0[1] = ctx0.h[1]; + _w0[2] = ctx0.h[2]; + _w0[3] = ctx0.h[3]; + _w1[0] = ctx0.h[4]; + _w1[1] = ctx0.h[5]; + _w1[2] = ctx0.h[6]; + _w1[3] = ctx0.h[7]; + + _w2[0] = 0; + _w2[1] = 0; + _w2[2] = 0; + _w2[3] = 0; + _w3[0] = 0; + _w3[1] = 0; + _w3[2] = 0; + _w3[3] = 0; sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);