From 9125062ffc2feeca3d228fa8a8bcf30c68612cff Mon Sep 17 00:00:00 2001 From: jsteube Date: Fri, 8 Sep 2017 13:32:19 +0200 Subject: [PATCH] Move volatiles for AMD closer to the problem --- OpenCL/inc_common.cl | 180 ++++++++++++++++++++++++++++------- OpenCL/inc_hash_md4.cl | 8 -- OpenCL/inc_hash_md5.cl | 8 -- OpenCL/inc_hash_ripemd160.cl | 8 -- OpenCL/inc_hash_sha1.cl | 8 -- OpenCL/inc_hash_sha224.cl | 8 -- OpenCL/inc_hash_sha256.cl | 8 -- OpenCL/inc_hash_sha384.cl | 8 -- OpenCL/inc_hash_sha512.cl | 8 -- OpenCL/inc_hash_whirlpool.cl | 4 - OpenCL/inc_rp_optimized.cl | 50 ++++++++-- 11 files changed, 187 insertions(+), 111 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 41b784ada..c1cec45f1 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1266,6 +1266,12 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC w0[0] = swap32 (w0[0]); w0[1] = swap32 (w0[1]); @@ -1284,7 +1290,7 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[2] = swap32 (w3[2]); w3[3] = swap32 (w3[3]); - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = amd_bytealign (w3[2], w3[3], offset); @@ -1635,7 +1641,7 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x const int selector = 0x0706050403020100 >> (offset_minus_4 * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = __byte_perm (w3[2], w3[3], selector); @@ -1967,6 +1973,12 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4] const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if defined IS_AMD || defined IS_GENERIC w0[0] = swap32 (w0[0]); w0[1] = swap32 (w0[1]); @@ -1985,7 +1997,7 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4] w3[2] = swap32 (w3[2]); w3[3] = swap32 (w3[3]); - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign (w3[3], 0, offset); @@ -2480,7 +2492,7 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4] #ifdef IS_NV // todo - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign ( 0, w3[3], offset_minus_4); @@ -3279,9 +3291,15 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4] static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = amd_bytealign (w3[2], w3[3], offset); @@ -3616,7 +3634,7 @@ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = __byte_perm (w3[3], w3[2], selector); @@ -3944,8 +3962,14 @@ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign (w3[3], 0, offset); @@ -4415,7 +4439,7 @@ static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4] const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = __byte_perm ( 0, w3[3], selector); @@ -4882,6 +4906,12 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC w0[0] = swap32 (w0[0]); w0[1] = swap32 (w0[1]); @@ -4916,7 +4946,7 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], w7[2] = swap32 (w7[2]); w7[3] = swap32 (w7[3]); - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = amd_bytealign (w7[2], w7[3], offset); @@ -6115,7 +6145,7 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], const int selector = 0x0706050403020100 >> (offset_minus_4 * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = __byte_perm (w7[2], w7[3], selector); @@ -6682,8 +6712,14 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = amd_bytealign (w7[2], w7[3], offset); @@ -7849,7 +7885,7 @@ static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = __byte_perm (w7[3], w7[2], selector); @@ -9008,8 +9044,14 @@ static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], u32x c4[4], u32x c5[4], u32x c6[4], u32x c7[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign (w7[3], 0, offset); @@ -10703,7 +10745,7 @@ static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = __byte_perm ( 0, w7[3], selector); @@ -12394,12 +12436,18 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset) const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC #pragma unroll for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]); - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = amd_bytealign (w[62], w[63], offset); @@ -16769,7 +16817,7 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset) const int selector = 0x0706050403020100 >> (offset_minus_4 * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = __byte_perm (w[62], w[63], selector); @@ -21128,8 +21176,14 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset) static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = amd_bytealign (w[62], w[63], offset); @@ -25495,7 +25549,7 @@ static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset) const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = __byte_perm (w[63], w[62], selector); @@ -32416,6 +32470,12 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC w0[0] = swap32_S (w0[0]); w0[1] = swap32_S (w0[1]); @@ -32434,7 +32494,7 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w w3[2] = swap32_S (w3[2]); w3[3] = swap32_S (w3[3]); - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = amd_bytealign_S (w3[2], w3[3], offset); @@ -32785,7 +32845,7 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w const int selector = 0x0706050403020100 >> (offset_minus_4 * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = __byte_perm_S (w3[2], w3[3], selector); @@ -33116,6 +33176,12 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if defined IS_AMD || defined IS_GENERIC w0[0] = swap32_S (w0[0]); w0[1] = swap32_S (w0[1]); @@ -33134,7 +33200,7 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], w3[2] = swap32_S (w3[2]); w3[3] = swap32_S (w3[3]); - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign_S (w3[3], 0, offset); @@ -33629,7 +33695,7 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], #ifdef IS_NV // todo - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign_S ( 0, w3[3], offset_minus_4); @@ -34428,8 +34494,14 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = amd_bytealign_S (w3[2], w3[3], offset); @@ -34763,7 +34835,7 @@ static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w3[3] = __byte_perm_S (w3[3], w3[2], selector); @@ -35090,8 +35162,14 @@ static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign_S (w3[3], 0, offset); @@ -35561,7 +35639,7 @@ static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = __byte_perm_S ( 0, w3[3], selector); @@ -36028,6 +36106,12 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC w0[0] = swap32_S (w0[0]); w0[1] = swap32_S (w0[1]); @@ -36062,7 +36146,7 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u w7[2] = swap32_S (w7[2]); w7[3] = swap32_S (w7[3]); - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = amd_bytealign_S (w7[2], w7[3], offset); @@ -37261,7 +37345,7 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u const int selector = 0x0706050403020100 >> (offset_minus_4 * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = __byte_perm_S (w7[2], w7[3], selector); @@ -37828,8 +37912,14 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = amd_bytealign_S (w7[2], w7[3], offset); @@ -38995,7 +39085,7 @@ static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w7[3] = __byte_perm_S (w7[3], w7[2], selector); @@ -40154,8 +40244,14 @@ static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], u32 c4[4], u32 c5[4], u32 c6[4], u32 c7[4], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = amd_bytealign_S (w7[3], 0, offset); @@ -41849,7 +41945,7 @@ static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2 const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: c0[0] = __byte_perm_S ( 0, w7[3], selector); @@ -43540,12 +43636,18 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset) const int offset_minus_4 = 4 - offset_mod_4; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC #pragma unroll for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]); - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = amd_bytealign_S (w[62], w[63], offset); @@ -47915,7 +48017,7 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset) const int selector = 0x0706050403020100 >> (offset_minus_4 * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = __byte_perm_S (w[62], w[63], selector); @@ -52274,8 +52376,14 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset) static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset) { + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = amd_bytealign_S (w[62], w[63], offset); @@ -56641,7 +56749,7 @@ static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset) const int selector = 0x0706050403020100 >> ((offset & 3) * 8); #endif - switch (offset / 4) + switch (offset_switch) { case 0: w[63] = __byte_perm_S (w[63], w[62], selector); diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl index 53358a438..4a72ce8a7 100644 --- a/OpenCL/inc_hash_md4.cl +++ b/OpenCL/inc_hash_md4.cl @@ -110,11 +110,7 @@ static void md4_init (md4_ctx_t *ctx) static void md4_update_64 (md4_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; @@ -1234,11 +1230,7 @@ static void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx0) static void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl index 0e2346a9e..0318db7c8 100644 --- a/OpenCL/inc_hash_md5.cl +++ b/OpenCL/inc_hash_md5.cl @@ -146,11 +146,7 @@ static void md5_init (md5_ctx_t *ctx) static void md5_update_64 (md5_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; @@ -1306,11 +1302,7 @@ static void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx0) static void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl index 70abae4c7..e46b490f9 100644 --- a/OpenCL/inc_hash_ripemd160.cl +++ b/OpenCL/inc_hash_ripemd160.cl @@ -244,11 +244,7 @@ static void ripemd160_init (ripemd160_ctx_t *ctx) static void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; @@ -1503,11 +1499,7 @@ static void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ripe static void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl index 6daf8d404..38d988469 100644 --- a/OpenCL/inc_hash_sha1.cl +++ b/OpenCL/inc_hash_sha1.cl @@ -176,11 +176,7 @@ static void sha1_init (sha1_ctx_t *ctx) static void sha1_update_64 (sha1_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; @@ -1603,11 +1599,7 @@ static void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t *ct static void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl index c29e8016d..a18620d4d 100644 --- a/OpenCL/inc_hash_sha224.cl +++ b/OpenCL/inc_hash_sha224.cl @@ -161,11 +161,7 @@ static void sha224_init (sha224_ctx_t *ctx) static void sha224_update_64 (sha224_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; @@ -1320,11 +1316,7 @@ static void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_ctx static void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl index 1607e1ae4..702635d2f 100644 --- a/OpenCL/inc_hash_sha256.cl +++ b/OpenCL/inc_hash_sha256.cl @@ -161,11 +161,7 @@ static void sha256_init (sha256_ctx_t *ctx) static void sha256_update_64 (sha256_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; @@ -1320,11 +1316,7 @@ static void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_ctx static void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl index 8dfddad3a..a10330d6b 100644 --- a/OpenCL/inc_hash_sha384.cl +++ b/OpenCL/inc_hash_sha384.cl @@ -185,11 +185,7 @@ static void sha384_init (sha384_ctx_t *ctx) static void sha384_update_128 (sha384_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 127; - #else const int pos = ctx->len & 127; - #endif ctx->len += len; @@ -2016,11 +2012,7 @@ static void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_ctx static void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 127; - #else const int pos = ctx->len & 127; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl index 392f23f65..1902d9dde 100644 --- a/OpenCL/inc_hash_sha512.cl +++ b/OpenCL/inc_hash_sha512.cl @@ -185,11 +185,7 @@ static void sha512_init (sha512_ctx_t *ctx) static void sha512_update_128 (sha512_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 127; - #else const int pos = ctx->len & 127; - #endif ctx->len += len; @@ -2016,11 +2012,7 @@ static void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_ctx static void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 127; - #else const int pos = ctx->len & 127; - #endif ctx->len += len; diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index 4b3b1c03a..a8ace7d8e 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -2607,11 +2607,7 @@ static void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, whir static void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len) { - #ifdef IS_AMD - volatile const int pos = ctx->len & 63; - #else const int pos = ctx->len & 63; - #endif ctx->len += len; diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index 0cb0831e7..940fd2894 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -20,7 +20,13 @@ static void truncate_right (u32 buf0[4], u32 buf1[4], const u32 offset) { const u32 tmp = (1u << ((offset & 3u) * 8u)) - 1u; - switch (offset / 4) + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + + switch (offset_switch) { case 0: buf0[0] &= tmp; buf0[1] = 0; @@ -73,7 +79,13 @@ static void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset) { const u32 tmp = ~((1u << ((offset & 3u) * 8u)) - 1u); - switch (offset / 4) + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + + switch (offset_switch) { case 0: buf0[0] &= tmp; break; @@ -767,6 +779,12 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 u32 s6 = 0; u32 s7 = 0; + #ifdef IS_AMD + volatile const int offset_switch = offset / 4; + #else + const int offset_switch = offset / 4; + #endif + #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC const u32 src_r00 = swap32_S (src_r0[0]); const u32 src_r01 = swap32_S (src_r0[1]); @@ -777,7 +795,7 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 const u32 src_r12 = swap32_S (src_r1[2]); const u32 src_r13 = swap32_S (src_r1[3]); - switch (offset / 4) + switch (offset_switch) { case 0: s7 = amd_bytealign_S (src_r12, src_r13, offset); @@ -902,7 +920,7 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 const u32 src_r12 = src_r1[2]; const u32 src_r13 = src_r1[3]; - switch (offset / 4) + switch (offset_switch) { case 0: s7 = __byte_perm_S (src_r12, src_r13, selector); @@ -1340,7 +1358,13 @@ static u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED con const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; - switch (p0 / 4) + #ifdef IS_AMD + volatile const int p0_switch = p0 / 4; + #else + const int p0_switch = p0 / 4; + #endif + + switch (p0_switch) { case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); @@ -1441,7 +1465,13 @@ static u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u3 const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; - switch (p0 / 4) + #ifdef IS_AMD + volatile const int p0_switch = p0 / 4; + #else + const int p0_switch = p0 / 4; + #endif + + switch (p0_switch) { case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); @@ -1521,7 +1551,13 @@ static u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const const u32 mr = 0xffffff00 << ((p0 & 3) * 8); - switch (p0 / 4) + #ifdef IS_AMD + volatile const int p0_switch = p0 / 4; + #else + const int p0_switch = p0 / 4; + #endif + + switch (p0_switch) { case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr); buf0[1] = tib40[1];