From 68bff949807f98187c505fe7363cc21d9433f11b Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 12 Aug 2018 18:04:33 +0200 Subject: [PATCH] Workaround rocm OpenCL runtime bug when copy data from constant to local memory --- OpenCL/m01500_a0-pure.cl | 4 ++-- OpenCL/m01500_a1-pure.cl | 4 ++-- OpenCL/m02500-pure.cl | 2 +- OpenCL/m02501-pure.cl | 2 +- OpenCL/m02610_a0-optimized.cl | 4 ++-- OpenCL/m02610_a0-pure.cl | 4 ++-- OpenCL/m02610_a1-optimized.cl | 4 ++-- OpenCL/m02610_a1-pure.cl | 4 ++-- OpenCL/m02610_a3-optimized.cl | 12 ++++++------ OpenCL/m02610_a3-pure.cl | 4 ++-- OpenCL/m02710_a0-optimized.cl | 4 ++-- OpenCL/m02710_a1-optimized.cl | 4 ++-- OpenCL/m02710_a3-optimized.cl | 12 ++++++------ OpenCL/m02810_a0-optimized.cl | 4 ++-- OpenCL/m02810_a0-pure.cl | 4 ++-- OpenCL/m02810_a1-optimized.cl | 4 ++-- OpenCL/m02810_a1-pure.cl | 4 ++-- OpenCL/m02810_a3-optimized.cl | 12 ++++++------ OpenCL/m02810_a3-pure.cl | 4 ++-- OpenCL/m03000_a0-pure.cl | 4 ++-- OpenCL/m03000_a1-pure.cl | 4 ++-- OpenCL/m03100_a0-optimized.cl | 4 ++-- OpenCL/m03100_a1-optimized.cl | 4 ++-- OpenCL/m03100_a3-optimized.cl | 8 ++++---- OpenCL/m03710_a0-optimized.cl | 4 ++-- OpenCL/m03710_a0-pure.cl | 4 ++-- OpenCL/m03710_a1-optimized.cl | 4 ++-- OpenCL/m03710_a1-pure.cl | 4 ++-- OpenCL/m03710_a3-optimized.cl | 12 ++++++------ OpenCL/m03710_a3-pure.cl | 4 ++-- OpenCL/m03910_a0-optimized.cl | 4 ++-- OpenCL/m03910_a0-pure.cl | 4 ++-- OpenCL/m03910_a1-optimized.cl | 4 ++-- OpenCL/m03910_a1-pure.cl | 4 ++-- OpenCL/m03910_a3-optimized.cl | 12 ++++++------ OpenCL/m03910_a3-pure.cl | 4 ++-- OpenCL/m04010_a0-optimized.cl | 4 ++-- OpenCL/m04010_a0-pure.cl | 4 ++-- OpenCL/m04010_a1-optimized.cl | 4 ++-- OpenCL/m04010_a1-pure.cl | 4 ++-- OpenCL/m04010_a3-optimized.cl | 12 ++++++------ OpenCL/m04010_a3-pure.cl | 4 ++-- OpenCL/m04110_a0-optimized.cl | 4 ++-- OpenCL/m04110_a0-pure.cl | 4 ++-- OpenCL/m04110_a1-optimized.cl | 4 ++-- OpenCL/m04110_a1-pure.cl | 4 ++-- OpenCL/m04110_a3-optimized.cl | 12 ++++++------ OpenCL/m04110_a3-pure.cl | 4 ++-- OpenCL/m04310_a0-optimized.cl | 4 ++-- OpenCL/m04310_a0-pure.cl | 4 ++-- OpenCL/m04310_a1-optimized.cl | 4 ++-- OpenCL/m04310_a1-pure.cl | 4 ++-- OpenCL/m04310_a3-optimized.cl | 12 ++++++------ OpenCL/m04310_a3-pure.cl | 4 ++-- OpenCL/m04400_a0-optimized.cl | 4 ++-- OpenCL/m04400_a0-pure.cl | 4 ++-- OpenCL/m04400_a1-optimized.cl | 4 ++-- OpenCL/m04400_a1-pure.cl | 4 ++-- OpenCL/m04400_a3-optimized.cl | 12 ++++++------ OpenCL/m04400_a3-pure.cl | 4 ++-- OpenCL/m04500_a0-optimized.cl | 4 ++-- OpenCL/m04500_a0-pure.cl | 4 ++-- OpenCL/m04500_a1-optimized.cl | 4 ++-- OpenCL/m04500_a1-pure.cl | 4 ++-- OpenCL/m04500_a3-optimized.cl | 12 ++++++------ OpenCL/m04500_a3-pure.cl | 4 ++-- OpenCL/m04520_a0-optimized.cl | 4 ++-- OpenCL/m04520_a0-pure.cl | 4 ++-- OpenCL/m04520_a1-optimized.cl | 4 ++-- OpenCL/m04520_a1-pure.cl | 4 ++-- OpenCL/m04520_a3-optimized.cl | 12 ++++++------ OpenCL/m04520_a3-pure.cl | 4 ++-- OpenCL/m04700_a0-optimized.cl | 4 ++-- OpenCL/m04700_a0-pure.cl | 4 ++-- OpenCL/m04700_a1-optimized.cl | 4 ++-- OpenCL/m04700_a1-pure.cl | 4 ++-- OpenCL/m04700_a3-optimized.cl | 12 ++++++------ OpenCL/m04700_a3-pure.cl | 4 ++-- OpenCL/m05300_a0-optimized.cl | 8 ++++---- OpenCL/m05300_a1-optimized.cl | 8 ++++---- OpenCL/m05300_a3-optimized.cl | 24 ++++++++++++------------ OpenCL/m05400_a0-optimized.cl | 8 ++++---- OpenCL/m05400_a1-optimized.cl | 8 ++++---- OpenCL/m05400_a3-optimized.cl | 24 ++++++++++++------------ OpenCL/m05500_a0-optimized.cl | 4 ++-- OpenCL/m05500_a0-pure.cl | 4 ++-- OpenCL/m05500_a1-optimized.cl | 4 ++-- OpenCL/m05500_a1-pure.cl | 4 ++-- OpenCL/m05500_a3-optimized.cl | 8 ++++---- OpenCL/m05500_a3-pure.cl | 4 ++-- OpenCL/m05600_a0-optimized.cl | 8 ++++---- OpenCL/m05600_a1-optimized.cl | 8 ++++---- OpenCL/m05600_a3-optimized.cl | 24 ++++++++++++------------ OpenCL/m05800-optimized.cl | 2 +- OpenCL/m05800-pure.cl | 2 +- OpenCL/m06100_a0-optimized.cl | 4 ++-- OpenCL/m06100_a0-pure.cl | 4 ++-- OpenCL/m06100_a1-optimized.cl | 4 ++-- OpenCL/m06100_a1-pure.cl | 4 ++-- OpenCL/m06100_a3-optimized.cl | 12 ++++++------ OpenCL/m06100_a3-pure.cl | 4 ++-- OpenCL/m06211-pure.cl | 2 +- OpenCL/m06212-pure.cl | 2 +- OpenCL/m06213-pure.cl | 2 +- OpenCL/m06221-pure.cl | 2 +- OpenCL/m06222-pure.cl | 2 +- OpenCL/m06223-pure.cl | 2 +- OpenCL/m06231-pure.cl | 6 +++--- OpenCL/m06232-pure.cl | 6 +++--- OpenCL/m06233-pure.cl | 6 +++--- OpenCL/m06600-pure.cl | 2 +- OpenCL/m06800-pure.cl | 2 +- OpenCL/m06900_a0-optimized.cl | 4 ++-- OpenCL/m06900_a1-optimized.cl | 4 ++-- OpenCL/m06900_a3-optimized.cl | 8 ++++---- OpenCL/m08000_a0-optimized.cl | 4 ++-- OpenCL/m08000_a1-optimized.cl | 4 ++-- OpenCL/m08000_a3-optimized.cl | 4 ++-- OpenCL/m08400_a0-optimized.cl | 4 ++-- OpenCL/m08400_a0-pure.cl | 4 ++-- OpenCL/m08400_a1-optimized.cl | 4 ++-- OpenCL/m08400_a1-pure.cl | 4 ++-- OpenCL/m08400_a3-optimized.cl | 12 ++++++------ OpenCL/m08400_a3-pure.cl | 4 ++-- OpenCL/m08500_a0-pure.cl | 4 ++-- OpenCL/m08500_a1-pure.cl | 4 ++-- OpenCL/m08500_a3-pure.cl | 4 ++-- OpenCL/m08600_a0-pure.cl | 4 ++-- OpenCL/m08600_a1-pure.cl | 4 ++-- OpenCL/m08600_a3-pure.cl | 4 ++-- OpenCL/m08700_a0-optimized.cl | 8 ++++---- OpenCL/m08700_a1-optimized.cl | 8 ++++---- OpenCL/m08700_a3-optimized.cl | 24 ++++++++++++------------ OpenCL/m08800-pure.cl | 2 +- OpenCL/m09100-pure.cl | 4 ++-- OpenCL/m09400-pure.cl | 2 +- OpenCL/m09500-pure.cl | 2 +- OpenCL/m09600-pure.cl | 2 +- OpenCL/m10700-optimized.cl | 2 +- OpenCL/m10700-pure.cl | 2 +- OpenCL/m11100_a0-optimized.cl | 4 ++-- OpenCL/m11100_a0-pure.cl | 4 ++-- OpenCL/m11100_a1-optimized.cl | 4 ++-- OpenCL/m11100_a1-pure.cl | 4 ++-- OpenCL/m11100_a3-optimized.cl | 12 ++++++------ OpenCL/m11100_a3-pure.cl | 4 ++-- OpenCL/m11300-pure.cl | 2 +- OpenCL/m11400_a0-pure.cl | 4 ++-- OpenCL/m11400_a1-pure.cl | 4 ++-- OpenCL/m11400_a3-pure.cl | 4 ++-- OpenCL/m11700_a0-optimized.cl | 4 ++-- OpenCL/m11700_a1-optimized.cl | 4 ++-- OpenCL/m11700_a3-optimized.cl | 12 ++++++------ OpenCL/m11800_a0-optimized.cl | 4 ++-- OpenCL/m11800_a1-optimized.cl | 4 ++-- OpenCL/m11800_a3-optimized.cl | 12 ++++++------ OpenCL/m12400-pure.cl | 4 ++-- OpenCL/m12500-pure.cl | 2 +- OpenCL/m12600_a0-optimized.cl | 4 ++-- OpenCL/m12600_a0-pure.cl | 4 ++-- OpenCL/m12600_a1-optimized.cl | 4 ++-- OpenCL/m12600_a1-pure.cl | 4 ++-- OpenCL/m12600_a3-optimized.cl | 12 ++++++------ OpenCL/m12600_a3-pure.cl | 4 ++-- OpenCL/m12700-pure.cl | 2 +- OpenCL/m12800-pure.cl | 2 +- OpenCL/m13200-pure.cl | 2 +- OpenCL/m13400-pure.cl | 4 ++-- OpenCL/m13751-pure.cl | 2 +- OpenCL/m13752-pure.cl | 2 +- OpenCL/m13753-pure.cl | 2 +- OpenCL/m13800_a0-optimized.cl | 4 ++-- OpenCL/m13800_a1-optimized.cl | 4 ++-- OpenCL/m13800_a3-optimized.cl | 12 ++++++------ OpenCL/m13900_a0-optimized.cl | 4 ++-- OpenCL/m13900_a0-pure.cl | 4 ++-- OpenCL/m13900_a1-optimized.cl | 4 ++-- OpenCL/m13900_a1-pure.cl | 4 ++-- OpenCL/m13900_a3-optimized.cl | 12 ++++++------ OpenCL/m13900_a3-pure.cl | 4 ++-- OpenCL/m14000_a0-pure.cl | 4 ++-- OpenCL/m14000_a1-pure.cl | 4 ++-- OpenCL/m14100_a0-pure.cl | 4 ++-- OpenCL/m14100_a1-pure.cl | 4 ++-- OpenCL/m14100_a3-pure.cl | 4 ++-- OpenCL/m14400_a0-optimized.cl | 4 ++-- OpenCL/m14400_a0-pure.cl | 4 ++-- OpenCL/m14400_a1-optimized.cl | 4 ++-- OpenCL/m14400_a1-pure.cl | 4 ++-- OpenCL/m14400_a3-optimized.cl | 12 ++++++------ OpenCL/m14400_a3-pure.cl | 4 ++-- OpenCL/m14611-pure.cl | 2 +- OpenCL/m14621-pure.cl | 2 +- OpenCL/m14631-pure.cl | 2 +- OpenCL/m14641-pure.cl | 2 +- OpenCL/m14700-pure.cl | 2 +- OpenCL/m14800-pure.cl | 2 +- OpenCL/m14900_a0-optimized.cl | 4 ++-- OpenCL/m14900_a1-optimized.cl | 4 ++-- OpenCL/m14900_a3-optimized.cl | 4 ++-- OpenCL/m15300-pure.cl | 2 +- OpenCL/m15900-pure.cl | 2 +- OpenCL/m16000_a0-pure.cl | 8 ++++---- OpenCL/m16000_a1-pure.cl | 8 ++++---- OpenCL/m16000_a3-pure.cl | 8 ++++---- OpenCL/m16200-pure.cl | 2 +- OpenCL/m16300-pure.cl | 2 +- OpenCL/m16600_a0-optimized.cl | 4 ++-- OpenCL/m16600_a0-pure.cl | 4 ++-- OpenCL/m16600_a1-optimized.cl | 4 ++-- OpenCL/m16600_a1-pure.cl | 4 ++-- OpenCL/m16600_a3-optimized.cl | 12 ++++++------ OpenCL/m16600_a3-pure.cl | 4 ++-- 213 files changed, 549 insertions(+), 549 deletions(-) diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl index 758ad7d7d..20dc91d81 100644 --- a/OpenCL/m01500_a0-pure.cl +++ b/OpenCL/m01500_a0-pure.cl @@ -498,7 +498,7 @@ __kernel void m01500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -582,7 +582,7 @@ __kernel void m01500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl index a57b4130e..b0580e1f5 100644 --- a/OpenCL/m01500_a1-pure.cl +++ b/OpenCL/m01500_a1-pure.cl @@ -496,7 +496,7 @@ __kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -659,7 +659,7 @@ __kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl index 219945cb5..6f64de7b8 100644 --- a/OpenCL/m02500-pure.cl +++ b/OpenCL/m02500-pure.cl @@ -1037,7 +1037,7 @@ __kernel void m02500_aux3 (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m02501-pure.cl b/OpenCL/m02501-pure.cl index cc98dc5d4..e3361673e 100644 --- a/OpenCL/m02501-pure.cl +++ b/OpenCL/m02501-pure.cl @@ -768,7 +768,7 @@ __kernel void m02501_aux3 (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m02610_a0-optimized.cl b/OpenCL/m02610_a0-optimized.cl index 162f7f910..3a286e51d 100644 --- a/OpenCL/m02610_a0-optimized.cl +++ b/OpenCL/m02610_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -326,7 +326,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02610_a0-pure.cl b/OpenCL/m02610_a0-pure.cl index 812dcb13d..db2dc0fd4 100644 --- a/OpenCL/m02610_a0-pure.cl +++ b/OpenCL/m02610_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m02610_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -146,7 +146,7 @@ __kernel void m02610_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02610_a1-optimized.cl b/OpenCL/m02610_a1-optimized.cl index 6afc30333..7ec79c7c3 100644 --- a/OpenCL/m02610_a1-optimized.cl +++ b/OpenCL/m02610_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -383,7 +383,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02610_a1-pure.cl b/OpenCL/m02610_a1-pure.cl index 0a07090e9..80815255e 100644 --- a/OpenCL/m02610_a1-pure.cl +++ b/OpenCL/m02610_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m02610_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -142,7 +142,7 @@ __kernel void m02610_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl index 79e5f8c33..9f2efb28d 100644 --- a/OpenCL/m02610_a3-optimized.cl +++ b/OpenCL/m02610_a3-optimized.cl @@ -606,7 +606,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -676,7 +676,7 @@ __kernel void m02610_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -746,7 +746,7 @@ __kernel void m02610_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -816,7 +816,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -886,7 +886,7 @@ __kernel void m02610_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -956,7 +956,7 @@ __kernel void m02610_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02610_a3-pure.cl b/OpenCL/m02610_a3-pure.cl index e5f04c2b8..cc030cb4d 100644 --- a/OpenCL/m02610_a3-pure.cl +++ b/OpenCL/m02610_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m02610_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -155,7 +155,7 @@ __kernel void m02610_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02710_a0-optimized.cl b/OpenCL/m02710_a0-optimized.cl index 7e7746354..efe749301 100644 --- a/OpenCL/m02710_a0-optimized.cl +++ b/OpenCL/m02710_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -411,7 +411,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02710_a1-optimized.cl b/OpenCL/m02710_a1-optimized.cl index 6edbfb0c8..4ba2973d4 100644 --- a/OpenCL/m02710_a1-optimized.cl +++ b/OpenCL/m02710_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -468,7 +468,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl index 45fa3a0b7..8ae549d10 100644 --- a/OpenCL/m02710_a3-optimized.cl +++ b/OpenCL/m02710_a3-optimized.cl @@ -775,7 +775,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -845,7 +845,7 @@ __kernel void m02710_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -915,7 +915,7 @@ __kernel void m02710_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -985,7 +985,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1055,7 +1055,7 @@ __kernel void m02710_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1125,7 +1125,7 @@ __kernel void m02710_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02810_a0-optimized.cl b/OpenCL/m02810_a0-optimized.cl index 60780f5be..b1d0fa378 100644 --- a/OpenCL/m02810_a0-optimized.cl +++ b/OpenCL/m02810_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -412,7 +412,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02810_a0-pure.cl b/OpenCL/m02810_a0-pure.cl index 4d488a79e..253a8ebe4 100644 --- a/OpenCL/m02810_a0-pure.cl +++ b/OpenCL/m02810_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m02810_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -169,7 +169,7 @@ __kernel void m02810_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02810_a1-optimized.cl b/OpenCL/m02810_a1-optimized.cl index 79e0279b8..7e9705f8d 100644 --- a/OpenCL/m02810_a1-optimized.cl +++ b/OpenCL/m02810_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -469,7 +469,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02810_a1-pure.cl b/OpenCL/m02810_a1-pure.cl index f7d1a5bab..9d127a28e 100644 --- a/OpenCL/m02810_a1-pure.cl +++ b/OpenCL/m02810_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m02810_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -165,7 +165,7 @@ __kernel void m02810_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl index 27194f241..3f9034d74 100644 --- a/OpenCL/m02810_a3-optimized.cl +++ b/OpenCL/m02810_a3-optimized.cl @@ -777,7 +777,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -847,7 +847,7 @@ __kernel void m02810_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -917,7 +917,7 @@ __kernel void m02810_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -987,7 +987,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1057,7 +1057,7 @@ __kernel void m02810_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1127,7 +1127,7 @@ __kernel void m02810_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m02810_a3-pure.cl b/OpenCL/m02810_a3-pure.cl index b00dfcb18..d6ec1b304 100644 --- a/OpenCL/m02810_a3-pure.cl +++ b/OpenCL/m02810_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m02810_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -178,7 +178,7 @@ __kernel void m02810_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl index 1e59ebbb2..174a6bb5b 100644 --- a/OpenCL/m03000_a0-pure.cl +++ b/OpenCL/m03000_a0-pure.cl @@ -508,7 +508,7 @@ __kernel void m03000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -593,7 +593,7 @@ __kernel void m03000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl index fd32589f2..dcd636c58 100644 --- a/OpenCL/m03000_a1-pure.cl +++ b/OpenCL/m03000_a1-pure.cl @@ -506,7 +506,7 @@ __kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -670,7 +670,7 @@ __kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m03100_a0-optimized.cl b/OpenCL/m03100_a0-optimized.cl index 74550c75e..055b29ab6 100644 --- a/OpenCL/m03100_a0-optimized.cl +++ b/OpenCL/m03100_a0-optimized.cl @@ -504,7 +504,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -727,7 +727,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m03100_a1-optimized.cl b/OpenCL/m03100_a1-optimized.cl index 559e235e9..16fc2d777 100644 --- a/OpenCL/m03100_a1-optimized.cl +++ b/OpenCL/m03100_a1-optimized.cl @@ -502,7 +502,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -785,7 +785,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m03100_a3-optimized.cl b/OpenCL/m03100_a3-optimized.cl index 6c2c8f2ed..0c5871c99 100644 --- a/OpenCL/m03100_a3-optimized.cl +++ b/OpenCL/m03100_a3-optimized.cl @@ -900,7 +900,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -974,7 +974,7 @@ __kernel void m03100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -1052,7 +1052,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -1126,7 +1126,7 @@ __kernel void m03100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m03710_a0-optimized.cl b/OpenCL/m03710_a0-optimized.cl index b09990695..cfa846d95 100644 --- a/OpenCL/m03710_a0-optimized.cl +++ b/OpenCL/m03710_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -357,7 +357,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03710_a0-pure.cl b/OpenCL/m03710_a0-pure.cl index 9525c1587..a1e39d98d 100644 --- a/OpenCL/m03710_a0-pure.cl +++ b/OpenCL/m03710_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m03710_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -159,7 +159,7 @@ __kernel void m03710_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03710_a1-optimized.cl b/OpenCL/m03710_a1-optimized.cl index d22fd776a..640743f6d 100644 --- a/OpenCL/m03710_a1-optimized.cl +++ b/OpenCL/m03710_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -414,7 +414,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03710_a1-pure.cl b/OpenCL/m03710_a1-pure.cl index 4f534eb50..5ff5de092 100644 --- a/OpenCL/m03710_a1-pure.cl +++ b/OpenCL/m03710_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m03710_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -155,7 +155,7 @@ __kernel void m03710_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl index 8eb78b0ae..ff38eda66 100644 --- a/OpenCL/m03710_a3-optimized.cl +++ b/OpenCL/m03710_a3-optimized.cl @@ -633,7 +633,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -737,7 +737,7 @@ __kernel void m03710_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -807,7 +807,7 @@ __kernel void m03710_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -877,7 +877,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -947,7 +947,7 @@ __kernel void m03710_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -983,7 +983,7 @@ __kernel void m03710_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03710_a3-pure.cl b/OpenCL/m03710_a3-pure.cl index 313c92dce..70d95442e 100644 --- a/OpenCL/m03710_a3-pure.cl +++ b/OpenCL/m03710_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m03710_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -168,7 +168,7 @@ __kernel void m03710_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03910_a0-optimized.cl b/OpenCL/m03910_a0-optimized.cl index 203f77d1c..8dd04c5f4 100644 --- a/OpenCL/m03910_a0-optimized.cl +++ b/OpenCL/m03910_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m03910_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -412,7 +412,7 @@ __kernel void m03910_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03910_a0-pure.cl b/OpenCL/m03910_a0-pure.cl index 86267ed95..0ce6afa0e 100644 --- a/OpenCL/m03910_a0-pure.cl +++ b/OpenCL/m03910_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m03910_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -169,7 +169,7 @@ __kernel void m03910_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03910_a1-optimized.cl b/OpenCL/m03910_a1-optimized.cl index 834164b8c..f2f0cc9ee 100644 --- a/OpenCL/m03910_a1-optimized.cl +++ b/OpenCL/m03910_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m03910_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -469,7 +469,7 @@ __kernel void m03910_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03910_a1-pure.cl b/OpenCL/m03910_a1-pure.cl index 0f33d3957..19fd821f1 100644 --- a/OpenCL/m03910_a1-pure.cl +++ b/OpenCL/m03910_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m03910_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -165,7 +165,7 @@ __kernel void m03910_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl index f3800e5eb..668ea8e40 100644 --- a/OpenCL/m03910_a3-optimized.cl +++ b/OpenCL/m03910_a3-optimized.cl @@ -777,7 +777,7 @@ __kernel void m03910_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -847,7 +847,7 @@ __kernel void m03910_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -917,7 +917,7 @@ __kernel void m03910_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -987,7 +987,7 @@ __kernel void m03910_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1057,7 +1057,7 @@ __kernel void m03910_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1127,7 +1127,7 @@ __kernel void m03910_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m03910_a3-pure.cl b/OpenCL/m03910_a3-pure.cl index ded976c98..5ae78eb03 100644 --- a/OpenCL/m03910_a3-pure.cl +++ b/OpenCL/m03910_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m03910_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -178,7 +178,7 @@ __kernel void m03910_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04010_a0-optimized.cl b/OpenCL/m04010_a0-optimized.cl index b910b1b55..82575df8a 100644 --- a/OpenCL/m04010_a0-optimized.cl +++ b/OpenCL/m04010_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04010_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -383,7 +383,7 @@ __kernel void m04010_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04010_a0-pure.cl b/OpenCL/m04010_a0-pure.cl index 239becda9..52b2d43c3 100644 --- a/OpenCL/m04010_a0-pure.cl +++ b/OpenCL/m04010_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m04010_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -150,7 +150,7 @@ __kernel void m04010_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04010_a1-optimized.cl b/OpenCL/m04010_a1-optimized.cl index c086274a0..641c56f8f 100644 --- a/OpenCL/m04010_a1-optimized.cl +++ b/OpenCL/m04010_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04010_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -439,7 +439,7 @@ __kernel void m04010_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04010_a1-pure.cl b/OpenCL/m04010_a1-pure.cl index 6d916613d..efdce92cc 100644 --- a/OpenCL/m04010_a1-pure.cl +++ b/OpenCL/m04010_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m04010_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -146,7 +146,7 @@ __kernel void m04010_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl index 6372643b0..4d06d1939 100644 --- a/OpenCL/m04010_a3-optimized.cl +++ b/OpenCL/m04010_a3-optimized.cl @@ -673,7 +673,7 @@ __kernel void m04010_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -777,7 +777,7 @@ __kernel void m04010_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -847,7 +847,7 @@ __kernel void m04010_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -917,7 +917,7 @@ __kernel void m04010_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -987,7 +987,7 @@ __kernel void m04010_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1023,7 +1023,7 @@ __kernel void m04010_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04010_a3-pure.cl b/OpenCL/m04010_a3-pure.cl index 1312202ba..dc9daa325 100644 --- a/OpenCL/m04010_a3-pure.cl +++ b/OpenCL/m04010_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m04010_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -163,7 +163,7 @@ __kernel void m04010_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04110_a0-optimized.cl b/OpenCL/m04110_a0-optimized.cl index b7ade07ca..5ab7190b2 100644 --- a/OpenCL/m04110_a0-optimized.cl +++ b/OpenCL/m04110_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04110_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -428,7 +428,7 @@ __kernel void m04110_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04110_a0-pure.cl b/OpenCL/m04110_a0-pure.cl index 4ce0d0f48..f1ec0b0d4 100644 --- a/OpenCL/m04110_a0-pure.cl +++ b/OpenCL/m04110_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m04110_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -163,7 +163,7 @@ __kernel void m04110_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04110_a1-optimized.cl b/OpenCL/m04110_a1-optimized.cl index 613ba43f5..f8202c5e4 100644 --- a/OpenCL/m04110_a1-optimized.cl +++ b/OpenCL/m04110_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04110_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -486,7 +486,7 @@ __kernel void m04110_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04110_a1-pure.cl b/OpenCL/m04110_a1-pure.cl index 2748e0a16..1f48c5b45 100644 --- a/OpenCL/m04110_a1-pure.cl +++ b/OpenCL/m04110_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m04110_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -159,7 +159,7 @@ __kernel void m04110_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl index c9c2783da..e4655a1c5 100644 --- a/OpenCL/m04110_a3-optimized.cl +++ b/OpenCL/m04110_a3-optimized.cl @@ -729,7 +729,7 @@ __kernel void m04110_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -833,7 +833,7 @@ __kernel void m04110_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -903,7 +903,7 @@ __kernel void m04110_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -973,7 +973,7 @@ __kernel void m04110_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1043,7 +1043,7 @@ __kernel void m04110_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1079,7 +1079,7 @@ __kernel void m04110_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04110_a3-pure.cl b/OpenCL/m04110_a3-pure.cl index fab7151f0..c0d3d94a7 100644 --- a/OpenCL/m04110_a3-pure.cl +++ b/OpenCL/m04110_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m04110_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -174,7 +174,7 @@ __kernel void m04110_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04310_a0-optimized.cl b/OpenCL/m04310_a0-optimized.cl index 509687344..8808ef081 100644 --- a/OpenCL/m04310_a0-optimized.cl +++ b/OpenCL/m04310_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -326,7 +326,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04310_a0-pure.cl b/OpenCL/m04310_a0-pure.cl index 5d94733bc..dfa1233ae 100644 --- a/OpenCL/m04310_a0-pure.cl +++ b/OpenCL/m04310_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m04310_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -146,7 +146,7 @@ __kernel void m04310_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04310_a1-optimized.cl b/OpenCL/m04310_a1-optimized.cl index 666d1b920..502f58fba 100644 --- a/OpenCL/m04310_a1-optimized.cl +++ b/OpenCL/m04310_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -383,7 +383,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04310_a1-pure.cl b/OpenCL/m04310_a1-pure.cl index c1423c690..380abc274 100644 --- a/OpenCL/m04310_a1-pure.cl +++ b/OpenCL/m04310_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m04310_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -142,7 +142,7 @@ __kernel void m04310_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl index f030ef299..20348a762 100644 --- a/OpenCL/m04310_a3-optimized.cl +++ b/OpenCL/m04310_a3-optimized.cl @@ -606,7 +606,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -676,7 +676,7 @@ __kernel void m04310_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -746,7 +746,7 @@ __kernel void m04310_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -816,7 +816,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -886,7 +886,7 @@ __kernel void m04310_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -956,7 +956,7 @@ __kernel void m04310_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04310_a3-pure.cl b/OpenCL/m04310_a3-pure.cl index 0c142ae69..975dbc536 100644 --- a/OpenCL/m04310_a3-pure.cl +++ b/OpenCL/m04310_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m04310_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -155,7 +155,7 @@ __kernel void m04310_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04400_a0-optimized.cl b/OpenCL/m04400_a0-optimized.cl index 8af708e24..0ed7e1458 100644 --- a/OpenCL/m04400_a0-optimized.cl +++ b/OpenCL/m04400_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -352,7 +352,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04400_a0-pure.cl b/OpenCL/m04400_a0-pure.cl index b62bb48a4..f3cb5da81 100644 --- a/OpenCL/m04400_a0-pure.cl +++ b/OpenCL/m04400_a0-pure.cl @@ -44,7 +44,7 @@ __kernel void m04400_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -141,7 +141,7 @@ __kernel void m04400_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04400_a1-optimized.cl b/OpenCL/m04400_a1-optimized.cl index bae51e157..adb498d71 100644 --- a/OpenCL/m04400_a1-optimized.cl +++ b/OpenCL/m04400_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -408,7 +408,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04400_a1-pure.cl b/OpenCL/m04400_a1-pure.cl index 859fdda83..6173fa881 100644 --- a/OpenCL/m04400_a1-pure.cl +++ b/OpenCL/m04400_a1-pure.cl @@ -42,7 +42,7 @@ __kernel void m04400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -137,7 +137,7 @@ __kernel void m04400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl index 4412794c6..f28272f9f 100644 --- a/OpenCL/m04400_a3-optimized.cl +++ b/OpenCL/m04400_a3-optimized.cl @@ -577,7 +577,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -647,7 +647,7 @@ __kernel void m04400_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -717,7 +717,7 @@ __kernel void m04400_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -787,7 +787,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -857,7 +857,7 @@ __kernel void m04400_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -927,7 +927,7 @@ __kernel void m04400_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04400_a3-pure.cl b/OpenCL/m04400_a3-pure.cl index ca9bfc147..e09f3955f 100644 --- a/OpenCL/m04400_a3-pure.cl +++ b/OpenCL/m04400_a3-pure.cl @@ -42,7 +42,7 @@ __kernel void m04400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -150,7 +150,7 @@ __kernel void m04400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04500_a0-optimized.cl b/OpenCL/m04500_a0-optimized.cl index c158e22b5..cd2fa707b 100644 --- a/OpenCL/m04500_a0-optimized.cl +++ b/OpenCL/m04500_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -379,7 +379,7 @@ __kernel void m04500_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04500_a0-pure.cl b/OpenCL/m04500_a0-pure.cl index 0d8fc274c..08223874d 100644 --- a/OpenCL/m04500_a0-pure.cl +++ b/OpenCL/m04500_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m04500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -140,7 +140,7 @@ __kernel void m04500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04500_a1-optimized.cl b/OpenCL/m04500_a1-optimized.cl index 1ce91bbb3..2be6d324a 100644 --- a/OpenCL/m04500_a1-optimized.cl +++ b/OpenCL/m04500_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -435,7 +435,7 @@ __kernel void m04500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04500_a1-pure.cl b/OpenCL/m04500_a1-pure.cl index 67a113ea1..98f0dce3a 100644 --- a/OpenCL/m04500_a1-pure.cl +++ b/OpenCL/m04500_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m04500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -136,7 +136,7 @@ __kernel void m04500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl index bc304ad01..29e347f8c 100644 --- a/OpenCL/m04500_a3-optimized.cl +++ b/OpenCL/m04500_a3-optimized.cl @@ -637,7 +637,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -707,7 +707,7 @@ __kernel void m04500_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -777,7 +777,7 @@ __kernel void m04500_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -847,7 +847,7 @@ __kernel void m04500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -917,7 +917,7 @@ __kernel void m04500_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -987,7 +987,7 @@ __kernel void m04500_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04500_a3-pure.cl b/OpenCL/m04500_a3-pure.cl index f07c447d3..79d902a08 100644 --- a/OpenCL/m04500_a3-pure.cl +++ b/OpenCL/m04500_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m04500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -149,7 +149,7 @@ __kernel void m04500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04520_a0-optimized.cl b/OpenCL/m04520_a0-optimized.cl index fe8833d58..8949536d6 100644 --- a/OpenCL/m04520_a0-optimized.cl +++ b/OpenCL/m04520_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04520_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -610,7 +610,7 @@ __kernel void m04520_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04520_a0-pure.cl b/OpenCL/m04520_a0-pure.cl index 0375fbd94..71a1f0d71 100644 --- a/OpenCL/m04520_a0-pure.cl +++ b/OpenCL/m04520_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m04520_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -155,7 +155,7 @@ __kernel void m04520_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04520_a1-optimized.cl b/OpenCL/m04520_a1-optimized.cl index b7db13257..7309a019d 100644 --- a/OpenCL/m04520_a1-optimized.cl +++ b/OpenCL/m04520_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04520_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -666,7 +666,7 @@ __kernel void m04520_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04520_a1-pure.cl b/OpenCL/m04520_a1-pure.cl index 2d22455d2..4a5438276 100644 --- a/OpenCL/m04520_a1-pure.cl +++ b/OpenCL/m04520_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m04520_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -151,7 +151,7 @@ __kernel void m04520_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl index edbdaee78..9e5d79bfe 100644 --- a/OpenCL/m04520_a3-optimized.cl +++ b/OpenCL/m04520_a3-optimized.cl @@ -1090,7 +1090,7 @@ __kernel void m04520_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1160,7 +1160,7 @@ __kernel void m04520_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1230,7 +1230,7 @@ __kernel void m04520_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1300,7 +1300,7 @@ __kernel void m04520_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1370,7 +1370,7 @@ __kernel void m04520_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1440,7 +1440,7 @@ __kernel void m04520_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04520_a3-pure.cl b/OpenCL/m04520_a3-pure.cl index 8e324b4c5..f141a441b 100644 --- a/OpenCL/m04520_a3-pure.cl +++ b/OpenCL/m04520_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m04520_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -166,7 +166,7 @@ __kernel void m04520_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04700_a0-optimized.cl b/OpenCL/m04700_a0-optimized.cl index 38d87fe58..537ecc6f1 100644 --- a/OpenCL/m04700_a0-optimized.cl +++ b/OpenCL/m04700_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -335,7 +335,7 @@ __kernel void m04700_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04700_a0-pure.cl b/OpenCL/m04700_a0-pure.cl index 9c785a5fc..140f936c6 100644 --- a/OpenCL/m04700_a0-pure.cl +++ b/OpenCL/m04700_a0-pure.cl @@ -44,7 +44,7 @@ __kernel void m04700_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -136,7 +136,7 @@ __kernel void m04700_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04700_a1-optimized.cl b/OpenCL/m04700_a1-optimized.cl index d64fa0ac5..537e9f932 100644 --- a/OpenCL/m04700_a1-optimized.cl +++ b/OpenCL/m04700_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -388,7 +388,7 @@ __kernel void m04700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04700_a1-pure.cl b/OpenCL/m04700_a1-pure.cl index 43be33eab..42a19f101 100644 --- a/OpenCL/m04700_a1-pure.cl +++ b/OpenCL/m04700_a1-pure.cl @@ -42,7 +42,7 @@ __kernel void m04700_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -132,7 +132,7 @@ __kernel void m04700_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl index 8cb962f40..030cf1697 100644 --- a/OpenCL/m04700_a3-optimized.cl +++ b/OpenCL/m04700_a3-optimized.cl @@ -577,7 +577,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -647,7 +647,7 @@ __kernel void m04700_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -717,7 +717,7 @@ __kernel void m04700_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -787,7 +787,7 @@ __kernel void m04700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -857,7 +857,7 @@ __kernel void m04700_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -927,7 +927,7 @@ __kernel void m04700_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m04700_a3-pure.cl b/OpenCL/m04700_a3-pure.cl index b4c09f550..dc335fd91 100644 --- a/OpenCL/m04700_a3-pure.cl +++ b/OpenCL/m04700_a3-pure.cl @@ -42,7 +42,7 @@ __kernel void m04700_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -145,7 +145,7 @@ __kernel void m04700_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m05300_a0-optimized.cl b/OpenCL/m05300_a0-optimized.cl index 868b6667c..843c03863 100644 --- a/OpenCL/m05300_a0-optimized.cl +++ b/OpenCL/m05300_a0-optimized.cl @@ -116,14 +116,14 @@ __kernel void m05300_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -291,14 +291,14 @@ __kernel void m05300_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } diff --git a/OpenCL/m05300_a1-optimized.cl b/OpenCL/m05300_a1-optimized.cl index c8e4f0664..919dc3ef3 100644 --- a/OpenCL/m05300_a1-optimized.cl +++ b/OpenCL/m05300_a1-optimized.cl @@ -114,14 +114,14 @@ __kernel void m05300_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -349,14 +349,14 @@ __kernel void m05300_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } diff --git a/OpenCL/m05300_a3-optimized.cl b/OpenCL/m05300_a3-optimized.cl index 3a92fe659..12e42572b 100644 --- a/OpenCL/m05300_a3-optimized.cl +++ b/OpenCL/m05300_a3-optimized.cl @@ -420,14 +420,14 @@ __kernel void m05300_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -493,14 +493,14 @@ __kernel void m05300_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -566,14 +566,14 @@ __kernel void m05300_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -639,14 +639,14 @@ __kernel void m05300_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -712,14 +712,14 @@ __kernel void m05300_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } @@ -785,14 +785,14 @@ __kernel void m05300_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i]; } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i]; } diff --git a/OpenCL/m05400_a0-optimized.cl b/OpenCL/m05400_a0-optimized.cl index 515daaf65..7ed188708 100644 --- a/OpenCL/m05400_a0-optimized.cl +++ b/OpenCL/m05400_a0-optimized.cl @@ -120,14 +120,14 @@ __kernel void m05400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -304,14 +304,14 @@ __kernel void m05400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } diff --git a/OpenCL/m05400_a1-optimized.cl b/OpenCL/m05400_a1-optimized.cl index 005098bed..1e279409f 100644 --- a/OpenCL/m05400_a1-optimized.cl +++ b/OpenCL/m05400_a1-optimized.cl @@ -118,14 +118,14 @@ __kernel void m05400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -370,14 +370,14 @@ __kernel void m05400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } diff --git a/OpenCL/m05400_a3-optimized.cl b/OpenCL/m05400_a3-optimized.cl index d9fc86a5e..a246e6013 100644 --- a/OpenCL/m05400_a3-optimized.cl +++ b/OpenCL/m05400_a3-optimized.cl @@ -424,14 +424,14 @@ __kernel void m05400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -497,14 +497,14 @@ __kernel void m05400_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -570,14 +570,14 @@ __kernel void m05400_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -643,14 +643,14 @@ __kernel void m05400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -716,14 +716,14 @@ __kernel void m05400_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } @@ -789,14 +789,14 @@ __kernel void m05400_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_nr_buf[16]; - for (u32 i = lid; i < 16; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz) { s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]); } __local u32 s_msg_buf[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]); } diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl index 2d27f8a0e..290f77922 100644 --- a/OpenCL/m05500_a0-optimized.cl +++ b/OpenCL/m05500_a0-optimized.cl @@ -505,7 +505,7 @@ __kernel void m05500_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -718,7 +718,7 @@ __kernel void m05500_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl index 543640697..6281a9daf 100644 --- a/OpenCL/m05500_a0-pure.cl +++ b/OpenCL/m05500_a0-pure.cl @@ -506,7 +506,7 @@ __kernel void m05500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -630,7 +630,7 @@ __kernel void m05500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl index 78371fa2d..b3b86f730 100644 --- a/OpenCL/m05500_a1-optimized.cl +++ b/OpenCL/m05500_a1-optimized.cl @@ -503,7 +503,7 @@ __kernel void m05500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -769,7 +769,7 @@ __kernel void m05500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl index 2d15b00b0..3f33eee96 100644 --- a/OpenCL/m05500_a1-pure.cl +++ b/OpenCL/m05500_a1-pure.cl @@ -504,7 +504,7 @@ __kernel void m05500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -626,7 +626,7 @@ __kernel void m05500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl index ea35775d5..9b3f70b36 100644 --- a/OpenCL/m05500_a3-optimized.cl +++ b/OpenCL/m05500_a3-optimized.cl @@ -836,7 +836,7 @@ __kernel void m05500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -910,7 +910,7 @@ __kernel void m05500_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -988,7 +988,7 @@ __kernel void m05500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -1062,7 +1062,7 @@ __kernel void m05500_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl index cae78dafc..e4152b998 100644 --- a/OpenCL/m05500_a3-pure.cl +++ b/OpenCL/m05500_a3-pure.cl @@ -504,7 +504,7 @@ __kernel void m05500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -639,7 +639,7 @@ __kernel void m05500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m05600_a0-optimized.cl b/OpenCL/m05600_a0-optimized.cl index 929292fc3..00b3ed2b9 100644 --- a/OpenCL/m05600_a0-optimized.cl +++ b/OpenCL/m05600_a0-optimized.cl @@ -117,14 +117,14 @@ __kernel void m05600_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -356,14 +356,14 @@ __kernel void m05600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } diff --git a/OpenCL/m05600_a1-optimized.cl b/OpenCL/m05600_a1-optimized.cl index 701cefb4e..66ffaf75e 100644 --- a/OpenCL/m05600_a1-optimized.cl +++ b/OpenCL/m05600_a1-optimized.cl @@ -115,14 +115,14 @@ __kernel void m05600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -412,14 +412,14 @@ __kernel void m05600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } diff --git a/OpenCL/m05600_a3-optimized.cl b/OpenCL/m05600_a3-optimized.cl index 385ff551f..cd5b9a830 100644 --- a/OpenCL/m05600_a3-optimized.cl +++ b/OpenCL/m05600_a3-optimized.cl @@ -539,14 +539,14 @@ __kernel void m05600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -612,14 +612,14 @@ __kernel void m05600_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -685,14 +685,14 @@ __kernel void m05600_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -758,14 +758,14 @@ __kernel void m05600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -831,14 +831,14 @@ __kernel void m05600_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } @@ -904,14 +904,14 @@ __kernel void m05600_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_userdomain_buf[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i]; } __local u32 s_chall_buf[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i]; } diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index 67dd05180..494e0b62b 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -2303,7 +2303,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_pc_dec[1024]; __local u32 s_pc_len[1024]; - for (u32 i = lid; i < 1024; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 1024; i += lsz) { s_pc_dec[i] = c_pc_dec[i]; s_pc_len[i] = c_pc_len[i]; diff --git a/OpenCL/m05800-pure.cl b/OpenCL/m05800-pure.cl index 24610fc49..2d5f5c107 100644 --- a/OpenCL/m05800-pure.cl +++ b/OpenCL/m05800-pure.cl @@ -2244,7 +2244,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_pc_dec[1024]; __local u32 s_pc_len[1024]; - for (u32 i = lid; i < 1024; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 1024; i += lsz) { s_pc_dec[i] = c_pc_dec[i]; s_pc_len[i] = c_pc_len[i]; diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl index 2b2dc955a..be3fe2ee0 100644 --- a/OpenCL/m06100_a0-optimized.cl +++ b/OpenCL/m06100_a0-optimized.cl @@ -39,7 +39,7 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -179,7 +179,7 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl index e8d0178c9..3bab742bc 100644 --- a/OpenCL/m06100_a0-pure.cl +++ b/OpenCL/m06100_a0-pure.cl @@ -34,7 +34,7 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -118,7 +118,7 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl index a001cf469..1325ac1b3 100644 --- a/OpenCL/m06100_a1-optimized.cl +++ b/OpenCL/m06100_a1-optimized.cl @@ -37,7 +37,7 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -235,7 +235,7 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl index e7697a48b..b170ca66e 100644 --- a/OpenCL/m06100_a1-pure.cl +++ b/OpenCL/m06100_a1-pure.cl @@ -32,7 +32,7 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -114,7 +114,7 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl index c1a6ba079..11fa4c7fe 100644 --- a/OpenCL/m06100_a3-optimized.cl +++ b/OpenCL/m06100_a3-optimized.cl @@ -187,7 +187,7 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -279,7 +279,7 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -371,7 +371,7 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -463,7 +463,7 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -555,7 +555,7 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -647,7 +647,7 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl index 6842cf061..564d39d5e 100644 --- a/OpenCL/m06100_a3-pure.cl +++ b/OpenCL/m06100_a3-pure.cl @@ -32,7 +32,7 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -127,7 +127,7 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl index 7b957c432..dd6c16566 100644 --- a/OpenCL/m06211-pure.cl +++ b/OpenCL/m06211-pure.cl @@ -296,7 +296,7 @@ __kernel void m06211_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl index cfd2c1a6d..a6827f4d8 100644 --- a/OpenCL/m06212-pure.cl +++ b/OpenCL/m06212-pure.cl @@ -296,7 +296,7 @@ __kernel void m06212_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl index 55d8ba384..68a8f672c 100644 --- a/OpenCL/m06213-pure.cl +++ b/OpenCL/m06213-pure.cl @@ -296,7 +296,7 @@ __kernel void m06213_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl index a1add67c3..790b35217 100644 --- a/OpenCL/m06221-pure.cl +++ b/OpenCL/m06221-pure.cl @@ -440,7 +440,7 @@ __kernel void m06221_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl index 1f68befdd..9219b6f5c 100644 --- a/OpenCL/m06222-pure.cl +++ b/OpenCL/m06222-pure.cl @@ -440,7 +440,7 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl index 1e324490d..30e3e70c9 100644 --- a/OpenCL/m06223-pure.cl +++ b/OpenCL/m06223-pure.cl @@ -440,7 +440,7 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl index 5ad25eba5..06a82204f 100644 --- a/OpenCL/m06231-pure.cl +++ b/OpenCL/m06231-pure.cl @@ -160,7 +160,7 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -368,7 +368,7 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -580,7 +580,7 @@ __kernel void m06231_comp (__global pw_t *pws, __global const kernel_rule_t *rul SHM_TYPE u32 s_te3[256]; SHM_TYPE u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl index dc5acf87a..8af85c314 100644 --- a/OpenCL/m06232-pure.cl +++ b/OpenCL/m06232-pure.cl @@ -160,7 +160,7 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -368,7 +368,7 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -580,7 +580,7 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul SHM_TYPE u32 s_te3[256]; SHM_TYPE u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl index 4aafda87d..37412dca2 100644 --- a/OpenCL/m06233-pure.cl +++ b/OpenCL/m06233-pure.cl @@ -160,7 +160,7 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -368,7 +368,7 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_Ch[0][i] = Ch[0][i]; s_Ch[1][i] = Ch[1][i]; @@ -580,7 +580,7 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul SHM_TYPE u32 s_te3[256]; SHM_TYPE u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06600-pure.cl b/OpenCL/m06600-pure.cl index 698775260..28eb0e356 100644 --- a/OpenCL/m06600-pure.cl +++ b/OpenCL/m06600-pure.cl @@ -242,7 +242,7 @@ __kernel void m06600_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06800-pure.cl b/OpenCL/m06800-pure.cl index fa6220e8b..b5f7a5b58 100644 --- a/OpenCL/m06800-pure.cl +++ b/OpenCL/m06800-pure.cl @@ -273,7 +273,7 @@ __kernel void m06800_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl index 87f8d41c7..50636129e 100644 --- a/OpenCL/m06900_a0-optimized.cl +++ b/OpenCL/m06900_a0-optimized.cl @@ -713,7 +713,7 @@ __kernel void m06900_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; @@ -929,7 +929,7 @@ __kernel void m06900_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl index df9f944c6..3f0662ea8 100644 --- a/OpenCL/m06900_a1-optimized.cl +++ b/OpenCL/m06900_a1-optimized.cl @@ -711,7 +711,7 @@ __kernel void m06900_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; @@ -981,7 +981,7 @@ __kernel void m06900_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl index d3ad23886..c91d71e0c 100644 --- a/OpenCL/m06900_a3-optimized.cl +++ b/OpenCL/m06900_a3-optimized.cl @@ -1071,7 +1071,7 @@ __kernel void m06900_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; @@ -1140,7 +1140,7 @@ __kernel void m06900_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; @@ -1213,7 +1213,7 @@ __kernel void m06900_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; @@ -1282,7 +1282,7 @@ __kernel void m06900_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tables[4][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_tables[0][i] = c_tables[0][i]; s_tables[1][i] = c_tables[1][i]; diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl index 9cc328933..7ddc97971 100644 --- a/OpenCL/m08000_a0-optimized.cl +++ b/OpenCL/m08000_a0-optimized.cl @@ -254,7 +254,7 @@ __kernel void m08000_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 w_s1[64]; __local u32 w_s2[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { w_s1[i] = 0; w_s2[i] = 0; @@ -425,7 +425,7 @@ __kernel void m08000_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 w_s1[64]; __local u32 w_s2[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { w_s1[i] = 0; w_s2[i] = 0; diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl index 85386f93a..4b5701373 100644 --- a/OpenCL/m08000_a1-optimized.cl +++ b/OpenCL/m08000_a1-optimized.cl @@ -252,7 +252,7 @@ __kernel void m08000_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 w_s1[64]; __local u32 w_s2[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { w_s1[i] = 0; w_s2[i] = 0; @@ -477,7 +477,7 @@ __kernel void m08000_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 w_s1[64]; __local u32 w_s2[64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { w_s1[i] = 0; w_s2[i] = 0; diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl index d39406956..d8022a0d3 100644 --- a/OpenCL/m08000_a3-optimized.cl +++ b/OpenCL/m08000_a3-optimized.cl @@ -249,7 +249,7 @@ DECLSPEC void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 *w, const u32 p * precompute final msg blocks */ - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { w_s1[i] = 0; w_s2[i] = 0; @@ -371,7 +371,7 @@ DECLSPEC void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 *w, const u32 p * precompute final msg blocks */ - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { w_s1[i] = 0; w_s2[i] = 0; diff --git a/OpenCL/m08400_a0-optimized.cl b/OpenCL/m08400_a0-optimized.cl index 3e397259e..261936182 100644 --- a/OpenCL/m08400_a0-optimized.cl +++ b/OpenCL/m08400_a0-optimized.cl @@ -43,7 +43,7 @@ __kernel void m08400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -297,7 +297,7 @@ __kernel void m08400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08400_a0-pure.cl b/OpenCL/m08400_a0-pure.cl index fe4c4ed27..6f60ce6de 100644 --- a/OpenCL/m08400_a0-pure.cl +++ b/OpenCL/m08400_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m08400_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -194,7 +194,7 @@ __kernel void m08400_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08400_a1-optimized.cl b/OpenCL/m08400_a1-optimized.cl index 0e5678b79..a4a7e07e0 100644 --- a/OpenCL/m08400_a1-optimized.cl +++ b/OpenCL/m08400_a1-optimized.cl @@ -41,7 +41,7 @@ __kernel void m08400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -353,7 +353,7 @@ __kernel void m08400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08400_a1-pure.cl b/OpenCL/m08400_a1-pure.cl index e2f073712..ee010566f 100644 --- a/OpenCL/m08400_a1-pure.cl +++ b/OpenCL/m08400_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m08400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -190,7 +190,7 @@ __kernel void m08400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl index 36bce3970..5451b8158 100644 --- a/OpenCL/m08400_a3-optimized.cl +++ b/OpenCL/m08400_a3-optimized.cl @@ -473,7 +473,7 @@ __kernel void m08400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -543,7 +543,7 @@ __kernel void m08400_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -613,7 +613,7 @@ __kernel void m08400_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -683,7 +683,7 @@ __kernel void m08400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -753,7 +753,7 @@ __kernel void m08400_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -823,7 +823,7 @@ __kernel void m08400_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08400_a3-pure.cl b/OpenCL/m08400_a3-pure.cl index 84cdb0183..a40a4d473 100644 --- a/OpenCL/m08400_a3-pure.cl +++ b/OpenCL/m08400_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m08400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -207,7 +207,7 @@ __kernel void m08400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl index e08c11a61..ce0416b10 100644 --- a/OpenCL/m08500_a0-pure.cl +++ b/OpenCL/m08500_a0-pure.cl @@ -538,7 +538,7 @@ __kernel void m08500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -636,7 +636,7 @@ __kernel void m08500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl index a915c95b8..8ab90ae00 100644 --- a/OpenCL/m08500_a1-pure.cl +++ b/OpenCL/m08500_a1-pure.cl @@ -536,7 +536,7 @@ __kernel void m08500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -692,7 +692,7 @@ __kernel void m08500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl index fe56cbd16..8cba52f72 100644 --- a/OpenCL/m08500_a3-pure.cl +++ b/OpenCL/m08500_a3-pure.cl @@ -674,7 +674,7 @@ __kernel void m08500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -748,7 +748,7 @@ __kernel void m08500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl index a7223fa15..bfd0f96a1 100644 --- a/OpenCL/m08600_a0-pure.cl +++ b/OpenCL/m08600_a0-pure.cl @@ -245,7 +245,7 @@ __kernel void m08600_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } @@ -309,7 +309,7 @@ __kernel void m08600_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl index 38fafda34..e6139bdcf 100644 --- a/OpenCL/m08600_a1-pure.cl +++ b/OpenCL/m08600_a1-pure.cl @@ -243,7 +243,7 @@ __kernel void m08600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } @@ -367,7 +367,7 @@ __kernel void m08600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl index 9bc89b17f..3d6c69f71 100644 --- a/OpenCL/m08600_a3-pure.cl +++ b/OpenCL/m08600_a3-pure.cl @@ -348,7 +348,7 @@ __kernel void m08600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } @@ -405,7 +405,7 @@ __kernel void m08600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl index bbc9501b6..61263c686 100644 --- a/OpenCL/m08700_a0-optimized.cl +++ b/OpenCL/m08700_a0-optimized.cl @@ -282,14 +282,14 @@ __kernel void m08700_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -469,14 +469,14 @@ __kernel void m08700_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl index 562437bf6..37f9abf06 100644 --- a/OpenCL/m08700_a1-optimized.cl +++ b/OpenCL/m08700_a1-optimized.cl @@ -280,14 +280,14 @@ __kernel void m08700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -527,14 +527,14 @@ __kernel void m08700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl index a966d6bfb..d4a89ff3b 100644 --- a/OpenCL/m08700_a3-optimized.cl +++ b/OpenCL/m08700_a3-optimized.cl @@ -559,14 +559,14 @@ __kernel void m08700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -627,14 +627,14 @@ __kernel void m08700_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -695,14 +695,14 @@ __kernel void m08700_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -763,14 +763,14 @@ __kernel void m08700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -831,14 +831,14 @@ __kernel void m08700_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -899,14 +899,14 @@ __kernel void m08700_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m08800-pure.cl b/OpenCL/m08800-pure.cl index dcfad6fac..3e905db99 100644 --- a/OpenCL/m08800-pure.cl +++ b/OpenCL/m08800-pure.cl @@ -232,7 +232,7 @@ __kernel void m08800_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl index 2ac39a1ca..248cc1bc1 100644 --- a/OpenCL/m09100-pure.cl +++ b/OpenCL/m09100-pure.cl @@ -402,14 +402,14 @@ __kernel void m09100_init (__global pw_t *pws, __global const kernel_rule_t *rul __local u8 s_lotus_magic_table[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_lotus_magic_table[i] = lotus_magic_table[i]; } __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m09400-pure.cl b/OpenCL/m09400-pure.cl index 4fbc151b1..773e437f9 100644 --- a/OpenCL/m09400-pure.cl +++ b/OpenCL/m09400-pure.cl @@ -135,7 +135,7 @@ __kernel void m09400_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m09500-pure.cl b/OpenCL/m09500-pure.cl index df4498a4d..89f32e671 100644 --- a/OpenCL/m09500-pure.cl +++ b/OpenCL/m09500-pure.cl @@ -135,7 +135,7 @@ __kernel void m09500_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m09600-pure.cl b/OpenCL/m09600-pure.cl index 9d2999d7e..be3a1bfa9 100644 --- a/OpenCL/m09600-pure.cl +++ b/OpenCL/m09600-pure.cl @@ -181,7 +181,7 @@ __kernel void m09600_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index 6a716ef74..dc6fb34a8 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -565,7 +565,7 @@ __kernel void m10700_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_te0[i] = te0[i]; s_te1[i] = te1[i]; diff --git a/OpenCL/m10700-pure.cl b/OpenCL/m10700-pure.cl index 23c5b7480..0a812c432 100644 --- a/OpenCL/m10700-pure.cl +++ b/OpenCL/m10700-pure.cl @@ -1166,7 +1166,7 @@ __kernel void m10700_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_te0[i] = te0[i]; s_te1[i] = te1[i]; diff --git a/OpenCL/m11100_a0-optimized.cl b/OpenCL/m11100_a0-optimized.cl index 1fc16d404..d7505e5fb 100644 --- a/OpenCL/m11100_a0-optimized.cl +++ b/OpenCL/m11100_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m11100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -376,7 +376,7 @@ __kernel void m11100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11100_a0-pure.cl b/OpenCL/m11100_a0-pure.cl index a9d7e0f3a..461b0077a 100644 --- a/OpenCL/m11100_a0-pure.cl +++ b/OpenCL/m11100_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m11100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -189,7 +189,7 @@ __kernel void m11100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11100_a1-optimized.cl b/OpenCL/m11100_a1-optimized.cl index 49a175920..2335805b2 100644 --- a/OpenCL/m11100_a1-optimized.cl +++ b/OpenCL/m11100_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m11100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -434,7 +434,7 @@ __kernel void m11100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11100_a1-pure.cl b/OpenCL/m11100_a1-pure.cl index 4c8752c68..4d59879bc 100644 --- a/OpenCL/m11100_a1-pure.cl +++ b/OpenCL/m11100_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m11100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -185,7 +185,7 @@ __kernel void m11100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl index 7289eee56..5149a98bc 100644 --- a/OpenCL/m11100_a3-optimized.cl +++ b/OpenCL/m11100_a3-optimized.cl @@ -665,7 +665,7 @@ __kernel void m11100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -735,7 +735,7 @@ __kernel void m11100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -805,7 +805,7 @@ __kernel void m11100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -875,7 +875,7 @@ __kernel void m11100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -945,7 +945,7 @@ __kernel void m11100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -1015,7 +1015,7 @@ __kernel void m11100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11100_a3-pure.cl b/OpenCL/m11100_a3-pure.cl index f56ab6580..a4ea65eec 100644 --- a/OpenCL/m11100_a3-pure.cl +++ b/OpenCL/m11100_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m11100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -228,7 +228,7 @@ __kernel void m11100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl index 54f28a740..94a635176 100644 --- a/OpenCL/m11300-pure.cl +++ b/OpenCL/m11300-pure.cl @@ -235,7 +235,7 @@ __kernel void m11300_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m11400_a0-pure.cl b/OpenCL/m11400_a0-pure.cl index 36408c7d3..5d1b954d4 100644 --- a/OpenCL/m11400_a0-pure.cl +++ b/OpenCL/m11400_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m11400_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -141,7 +141,7 @@ __kernel void m11400_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11400_a1-pure.cl b/OpenCL/m11400_a1-pure.cl index d79d8363e..1371efbef 100644 --- a/OpenCL/m11400_a1-pure.cl +++ b/OpenCL/m11400_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m11400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -135,7 +135,7 @@ __kernel void m11400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11400_a3-pure.cl b/OpenCL/m11400_a3-pure.cl index fb8c6d30b..224db5c14 100644 --- a/OpenCL/m11400_a3-pure.cl +++ b/OpenCL/m11400_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m11400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -161,7 +161,7 @@ __kernel void m11400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m11700_a0-optimized.cl b/OpenCL/m11700_a0-optimized.cl index 27e459f8a..62524f4c4 100644 --- a/OpenCL/m11700_a0-optimized.cl +++ b/OpenCL/m11700_a0-optimized.cl @@ -2317,7 +2317,7 @@ __kernel void m11700_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2475,7 +2475,7 @@ __kernel void m11700_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; diff --git a/OpenCL/m11700_a1-optimized.cl b/OpenCL/m11700_a1-optimized.cl index 888c9e05c..220e5baec 100644 --- a/OpenCL/m11700_a1-optimized.cl +++ b/OpenCL/m11700_a1-optimized.cl @@ -2315,7 +2315,7 @@ __kernel void m11700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2531,7 +2531,7 @@ __kernel void m11700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; diff --git a/OpenCL/m11700_a3-optimized.cl b/OpenCL/m11700_a3-optimized.cl index e0ebd3535..5efd953d4 100644 --- a/OpenCL/m11700_a3-optimized.cl +++ b/OpenCL/m11700_a3-optimized.cl @@ -2484,7 +2484,7 @@ __kernel void m11700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2548,7 +2548,7 @@ __kernel void m11700_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2612,7 +2612,7 @@ __kernel void m11700_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2676,7 +2676,7 @@ __kernel void m11700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2740,7 +2740,7 @@ __kernel void m11700_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2804,7 +2804,7 @@ __kernel void m11700_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; diff --git a/OpenCL/m11800_a0-optimized.cl b/OpenCL/m11800_a0-optimized.cl index de8b141e4..6397b63bb 100644 --- a/OpenCL/m11800_a0-optimized.cl +++ b/OpenCL/m11800_a0-optimized.cl @@ -2317,7 +2317,7 @@ __kernel void m11800_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2475,7 +2475,7 @@ __kernel void m11800_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; diff --git a/OpenCL/m11800_a1-optimized.cl b/OpenCL/m11800_a1-optimized.cl index d853f3d93..b7d1405a0 100644 --- a/OpenCL/m11800_a1-optimized.cl +++ b/OpenCL/m11800_a1-optimized.cl @@ -2315,7 +2315,7 @@ __kernel void m11800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2531,7 +2531,7 @@ __kernel void m11800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; diff --git a/OpenCL/m11800_a3-optimized.cl b/OpenCL/m11800_a3-optimized.cl index 7f53950f9..74f51d8ad 100644 --- a/OpenCL/m11800_a3-optimized.cl +++ b/OpenCL/m11800_a3-optimized.cl @@ -2484,7 +2484,7 @@ __kernel void m11800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2548,7 +2548,7 @@ __kernel void m11800_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2612,7 +2612,7 @@ __kernel void m11800_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2676,7 +2676,7 @@ __kernel void m11800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2740,7 +2740,7 @@ __kernel void m11800_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; @@ -2804,7 +2804,7 @@ __kernel void m11800_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u64 s_sbob_sl64[8][256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_sbob_sl64[0][i] = sbob_sl64[0][i]; s_sbob_sl64[1][i] = sbob_sl64[1][i]; diff --git a/OpenCL/m12400-pure.cl b/OpenCL/m12400-pure.cl index addee1b02..b921ee81e 100644 --- a/OpenCL/m12400-pure.cl +++ b/OpenCL/m12400-pure.cl @@ -506,7 +506,7 @@ __kernel void m12400_init (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -642,7 +642,7 @@ __kernel void m12400_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index 8ec2a9a1d..8c8076eea 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -290,7 +290,7 @@ __kernel void m12500_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m12600_a0-optimized.cl b/OpenCL/m12600_a0-optimized.cl index 621cef26e..d17dcb200 100644 --- a/OpenCL/m12600_a0-optimized.cl +++ b/OpenCL/m12600_a0-optimized.cl @@ -42,7 +42,7 @@ __kernel void m12600_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -382,7 +382,7 @@ __kernel void m12600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m12600_a0-pure.cl b/OpenCL/m12600_a0-pure.cl index a40bced95..7f18299b2 100644 --- a/OpenCL/m12600_a0-pure.cl +++ b/OpenCL/m12600_a0-pure.cl @@ -44,7 +44,7 @@ __kernel void m12600_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -180,7 +180,7 @@ __kernel void m12600_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m12600_a1-optimized.cl b/OpenCL/m12600_a1-optimized.cl index 7eef023ba..eb4ee4845 100644 --- a/OpenCL/m12600_a1-optimized.cl +++ b/OpenCL/m12600_a1-optimized.cl @@ -40,7 +40,7 @@ __kernel void m12600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -438,7 +438,7 @@ __kernel void m12600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m12600_a1-pure.cl b/OpenCL/m12600_a1-pure.cl index 643fcf51a..878aa5b9e 100644 --- a/OpenCL/m12600_a1-pure.cl +++ b/OpenCL/m12600_a1-pure.cl @@ -42,7 +42,7 @@ __kernel void m12600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -176,7 +176,7 @@ __kernel void m12600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl index 7871a4759..d36015ee3 100644 --- a/OpenCL/m12600_a3-optimized.cl +++ b/OpenCL/m12600_a3-optimized.cl @@ -637,7 +637,7 @@ __kernel void m12600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -707,7 +707,7 @@ __kernel void m12600_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -777,7 +777,7 @@ __kernel void m12600_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -847,7 +847,7 @@ __kernel void m12600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -917,7 +917,7 @@ __kernel void m12600_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -987,7 +987,7 @@ __kernel void m12600_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m12600_a3-pure.cl b/OpenCL/m12600_a3-pure.cl index 739772660..e8c229674 100644 --- a/OpenCL/m12600_a3-pure.cl +++ b/OpenCL/m12600_a3-pure.cl @@ -42,7 +42,7 @@ __kernel void m12600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -189,7 +189,7 @@ __kernel void m12600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m12700-pure.cl b/OpenCL/m12700-pure.cl index cceb407fd..95f4ccb0f 100644 --- a/OpenCL/m12700-pure.cl +++ b/OpenCL/m12700-pure.cl @@ -251,7 +251,7 @@ __kernel void m12700_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m12800-pure.cl b/OpenCL/m12800-pure.cl index 3b564aefa..bbe63c315 100644 --- a/OpenCL/m12800-pure.cl +++ b/OpenCL/m12800-pure.cl @@ -75,7 +75,7 @@ __kernel void m12800_init (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m13200-pure.cl b/OpenCL/m13200-pure.cl index c38d75a50..d92bf119c 100644 --- a/OpenCL/m13200-pure.cl +++ b/OpenCL/m13200-pure.cl @@ -89,7 +89,7 @@ __kernel void m13200_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m13400-pure.cl b/OpenCL/m13400-pure.cl index 77320ab5d..b7c3b49e3 100644 --- a/OpenCL/m13400-pure.cl +++ b/OpenCL/m13400-pure.cl @@ -152,7 +152,7 @@ __kernel void m13400_loop (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_te0[i] = te0[i]; s_te1[i] = te1[i]; @@ -246,7 +246,7 @@ __kernel void m13400_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl index 53c2495c0..3749eaf2d 100644 --- a/OpenCL/m13751-pure.cl +++ b/OpenCL/m13751-pure.cl @@ -352,7 +352,7 @@ __kernel void m13751_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl index a55a16e08..8edd845c0 100644 --- a/OpenCL/m13752-pure.cl +++ b/OpenCL/m13752-pure.cl @@ -352,7 +352,7 @@ __kernel void m13752_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl index 7ede7513c..e0d3fd3c7 100644 --- a/OpenCL/m13753-pure.cl +++ b/OpenCL/m13753-pure.cl @@ -352,7 +352,7 @@ __kernel void m13753_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index 6b5898bcf..2f1f82548 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -430,7 +430,7 @@ __kernel void m13800_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -626,7 +626,7 @@ __kernel void m13800_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 09acc73eb..867253fdf 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -428,7 +428,7 @@ __kernel void m13800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -680,7 +680,7 @@ __kernel void m13800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index 4d73a7706..5c1ed9f9a 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -742,7 +742,7 @@ __kernel void m13800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -799,7 +799,7 @@ __kernel void m13800_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -856,7 +856,7 @@ __kernel void m13800_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -913,7 +913,7 @@ __kernel void m13800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -970,7 +970,7 @@ __kernel void m13800_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } @@ -1027,7 +1027,7 @@ __kernel void m13800_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_esalt[32]; - for (u32 i = lid; i < 32; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 32; i += lsz) { s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; } diff --git a/OpenCL/m13900_a0-optimized.cl b/OpenCL/m13900_a0-optimized.cl index dc36f39d5..3d42bfd21 100644 --- a/OpenCL/m13900_a0-optimized.cl +++ b/OpenCL/m13900_a0-optimized.cl @@ -43,7 +43,7 @@ __kernel void m13900_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -270,7 +270,7 @@ __kernel void m13900_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m13900_a0-pure.cl b/OpenCL/m13900_a0-pure.cl index c69c5be8b..29327d711 100644 --- a/OpenCL/m13900_a0-pure.cl +++ b/OpenCL/m13900_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m13900_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -194,7 +194,7 @@ __kernel void m13900_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m13900_a1-optimized.cl b/OpenCL/m13900_a1-optimized.cl index 68220e945..341b6aab4 100644 --- a/OpenCL/m13900_a1-optimized.cl +++ b/OpenCL/m13900_a1-optimized.cl @@ -41,7 +41,7 @@ __kernel void m13900_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -326,7 +326,7 @@ __kernel void m13900_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m13900_a1-pure.cl b/OpenCL/m13900_a1-pure.cl index dca130960..ee0699ae5 100644 --- a/OpenCL/m13900_a1-pure.cl +++ b/OpenCL/m13900_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m13900_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -190,7 +190,7 @@ __kernel void m13900_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl index bf97e6886..35e65dfbe 100644 --- a/OpenCL/m13900_a3-optimized.cl +++ b/OpenCL/m13900_a3-optimized.cl @@ -430,7 +430,7 @@ __kernel void m13900_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -500,7 +500,7 @@ __kernel void m13900_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -570,7 +570,7 @@ __kernel void m13900_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -640,7 +640,7 @@ __kernel void m13900_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -710,7 +710,7 @@ __kernel void m13900_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -780,7 +780,7 @@ __kernel void m13900_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m13900_a3-pure.cl b/OpenCL/m13900_a3-pure.cl index 95a4fac07..a01784d96 100644 --- a/OpenCL/m13900_a3-pure.cl +++ b/OpenCL/m13900_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m13900_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -207,7 +207,7 @@ __kernel void m13900_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl index ab83c985c..2c985988a 100644 --- a/OpenCL/m14000_a0-pure.cl +++ b/OpenCL/m14000_a0-pure.cl @@ -513,7 +513,7 @@ __kernel void m14000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -618,7 +618,7 @@ __kernel void m14000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl index 333e2bbb3..3dd6b32d6 100644 --- a/OpenCL/m14000_a1-pure.cl +++ b/OpenCL/m14000_a1-pure.cl @@ -503,7 +503,7 @@ __kernel void m14000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -651,7 +651,7 @@ __kernel void m14000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl index 7633a4760..0abdf5ad7 100644 --- a/OpenCL/m14100_a0-pure.cl +++ b/OpenCL/m14100_a0-pure.cl @@ -557,7 +557,7 @@ __kernel void m14100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -692,7 +692,7 @@ __kernel void m14100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl index e68aafb4a..75a393d76 100644 --- a/OpenCL/m14100_a1-pure.cl +++ b/OpenCL/m14100_a1-pure.cl @@ -547,7 +547,7 @@ __kernel void m14100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -730,7 +730,7 @@ __kernel void m14100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl index b2760f32b..b56178933 100644 --- a/OpenCL/m14100_a3-pure.cl +++ b/OpenCL/m14100_a3-pure.cl @@ -729,7 +729,7 @@ __kernel void m14100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -803,7 +803,7 @@ __kernel void m14100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl index 67c2fd70b..99cf619cc 100644 --- a/OpenCL/m14400_a0-optimized.cl +++ b/OpenCL/m14400_a0-optimized.cl @@ -133,7 +133,7 @@ __kernel void m14400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -404,7 +404,7 @@ __kernel void m14400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14400_a0-pure.cl b/OpenCL/m14400_a0-pure.cl index cda06eb8b..63723d9aa 100644 --- a/OpenCL/m14400_a0-pure.cl +++ b/OpenCL/m14400_a0-pure.cl @@ -43,7 +43,7 @@ __kernel void m14400_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -290,7 +290,7 @@ __kernel void m14400_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl index ac6c90c59..326956fcc 100644 --- a/OpenCL/m14400_a1-optimized.cl +++ b/OpenCL/m14400_a1-optimized.cl @@ -133,7 +133,7 @@ __kernel void m14400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -468,7 +468,7 @@ __kernel void m14400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14400_a1-pure.cl b/OpenCL/m14400_a1-pure.cl index 4f3fa1cb1..5d2df9c1c 100644 --- a/OpenCL/m14400_a1-pure.cl +++ b/OpenCL/m14400_a1-pure.cl @@ -41,7 +41,7 @@ __kernel void m14400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -286,7 +286,7 @@ __kernel void m14400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl index 77db2a336..37cd80c23 100644 --- a/OpenCL/m14400_a3-optimized.cl +++ b/OpenCL/m14400_a3-optimized.cl @@ -643,7 +643,7 @@ __kernel void m14400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -713,7 +713,7 @@ __kernel void m14400_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -783,7 +783,7 @@ __kernel void m14400_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -853,7 +853,7 @@ __kernel void m14400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -923,7 +923,7 @@ __kernel void m14400_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -993,7 +993,7 @@ __kernel void m14400_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14400_a3-pure.cl b/OpenCL/m14400_a3-pure.cl index 75b32087e..553a5778c 100644 --- a/OpenCL/m14400_a3-pure.cl +++ b/OpenCL/m14400_a3-pure.cl @@ -41,7 +41,7 @@ __kernel void m14400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; @@ -311,7 +311,7 @@ __kernel void m14400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 l_bin2asc[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { const u32 i0 = (i >> 0) & 15; const u32 i1 = (i >> 4) & 15; diff --git a/OpenCL/m14611-pure.cl b/OpenCL/m14611-pure.cl index b69db79ba..6a2f80da8 100644 --- a/OpenCL/m14611-pure.cl +++ b/OpenCL/m14611-pure.cl @@ -246,7 +246,7 @@ __kernel void m14611_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m14621-pure.cl b/OpenCL/m14621-pure.cl index 9eb240dba..602122ce3 100644 --- a/OpenCL/m14621-pure.cl +++ b/OpenCL/m14621-pure.cl @@ -285,7 +285,7 @@ __kernel void m14621_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m14631-pure.cl b/OpenCL/m14631-pure.cl index 252c2dff5..903001c38 100644 --- a/OpenCL/m14631-pure.cl +++ b/OpenCL/m14631-pure.cl @@ -341,7 +341,7 @@ __kernel void m14631_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m14641-pure.cl b/OpenCL/m14641-pure.cl index 464cc5dda..8dbb27168 100644 --- a/OpenCL/m14641-pure.cl +++ b/OpenCL/m14641-pure.cl @@ -246,7 +246,7 @@ __kernel void m14641_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m14700-pure.cl b/OpenCL/m14700-pure.cl index 8b7a42bac..b3c5c4f54 100644 --- a/OpenCL/m14700-pure.cl +++ b/OpenCL/m14700-pure.cl @@ -234,7 +234,7 @@ __kernel void m14700_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m14800-pure.cl b/OpenCL/m14800-pure.cl index b31b6696f..80d0564cc 100644 --- a/OpenCL/m14800-pure.cl +++ b/OpenCL/m14800-pure.cl @@ -498,7 +498,7 @@ __kernel void m14800_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m14900_a0-optimized.cl b/OpenCL/m14900_a0-optimized.cl index 4a6280783..23bab0f7c 100644 --- a/OpenCL/m14900_a0-optimized.cl +++ b/OpenCL/m14900_a0-optimized.cl @@ -120,7 +120,7 @@ __kernel void m14900_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u8 s_ftable[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_ftable[i] = c_ftable[i]; } @@ -211,7 +211,7 @@ __kernel void m14900_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u8 s_ftable[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_ftable[i] = c_ftable[i]; } diff --git a/OpenCL/m14900_a1-optimized.cl b/OpenCL/m14900_a1-optimized.cl index ab2873e87..5ca20e7b9 100644 --- a/OpenCL/m14900_a1-optimized.cl +++ b/OpenCL/m14900_a1-optimized.cl @@ -118,7 +118,7 @@ __kernel void m14900_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u8 s_ftable[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_ftable[i] = c_ftable[i]; } @@ -273,7 +273,7 @@ __kernel void m14900_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u8 s_ftable[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_ftable[i] = c_ftable[i]; } diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl index 5a76e8b9f..17819d166 100644 --- a/OpenCL/m14900_a3-optimized.cl +++ b/OpenCL/m14900_a3-optimized.cl @@ -226,7 +226,7 @@ __kernel void m14900_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u8 s_ftable[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_ftable[i] = c_ftable[i]; } @@ -300,7 +300,7 @@ __kernel void m14900_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u8 s_ftable[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_ftable[i] = c_ftable[i]; } diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl index 9691c40cc..37c031e37 100644 --- a/OpenCL/m15300-pure.cl +++ b/OpenCL/m15300-pure.cl @@ -877,7 +877,7 @@ __kernel void m15300_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl index e3761071f..24ee74a57 100644 --- a/OpenCL/m15900-pure.cl +++ b/OpenCL/m15900-pure.cl @@ -493,7 +493,7 @@ __kernel void m15900_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl index 251f4161e..938a160f1 100644 --- a/OpenCL/m16000_a0-pure.cl +++ b/OpenCL/m16000_a0-pure.cl @@ -510,7 +510,7 @@ __kernel void m16000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -533,7 +533,7 @@ __kernel void m16000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_tripcode_salt[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_tripcode_salt[i] = c_tripcode_salt[i]; } @@ -603,7 +603,7 @@ __kernel void m16000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -626,7 +626,7 @@ __kernel void m16000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_tripcode_salt[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_tripcode_salt[i] = c_tripcode_salt[i]; } diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl index d8ef49928..d8fadacc4 100644 --- a/OpenCL/m16000_a1-pure.cl +++ b/OpenCL/m16000_a1-pure.cl @@ -508,7 +508,7 @@ __kernel void m16000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -531,7 +531,7 @@ __kernel void m16000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tripcode_salt[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_tripcode_salt[i] = c_tripcode_salt[i]; } @@ -680,7 +680,7 @@ __kernel void m16000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -703,7 +703,7 @@ __kernel void m16000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tripcode_salt[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_tripcode_salt[i] = c_tripcode_salt[i]; } diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl index b7c5c13a1..7baca0344 100644 --- a/OpenCL/m16000_a3-pure.cl +++ b/OpenCL/m16000_a3-pure.cl @@ -508,7 +508,7 @@ __kernel void m16000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -531,7 +531,7 @@ __kernel void m16000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tripcode_salt[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_tripcode_salt[i] = c_tripcode_salt[i]; } @@ -629,7 +629,7 @@ __kernel void m16000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_SPtrans[8][64]; __local u32 s_skb[8][64]; - for (u32 i = lid; i < 64; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz) { s_SPtrans[0][i] = c_SPtrans[0][i]; s_SPtrans[1][i] = c_SPtrans[1][i]; @@ -652,7 +652,7 @@ __kernel void m16000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_tripcode_salt[128]; - for (u32 i = lid; i < 128; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz) { s_tripcode_salt[i] = c_tripcode_salt[i]; } diff --git a/OpenCL/m16200-pure.cl b/OpenCL/m16200-pure.cl index 36aadca2b..c0af2167b 100644 --- a/OpenCL/m16200-pure.cl +++ b/OpenCL/m16200-pure.cl @@ -273,7 +273,7 @@ __kernel void m16200_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16300-pure.cl b/OpenCL/m16300-pure.cl index c676cf950..f18f41c45 100644 --- a/OpenCL/m16300-pure.cl +++ b/OpenCL/m16300-pure.cl @@ -408,7 +408,7 @@ __kernel void m16300_comp (__global pw_t *pws, __global const kernel_rule_t *rul __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl index ebce8c3b4..798cac3ce 100644 --- a/OpenCL/m16600_a0-optimized.cl +++ b/OpenCL/m16600_a0-optimized.cl @@ -43,7 +43,7 @@ __kernel void m16600_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -412,7 +412,7 @@ __kernel void m16600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl index 97828f437..e08d0b600 100644 --- a/OpenCL/m16600_a0-pure.cl +++ b/OpenCL/m16600_a0-pure.cl @@ -44,7 +44,7 @@ __kernel void m16600_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -219,7 +219,7 @@ __kernel void m16600_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl index 5e657a9e3..bf110a97f 100644 --- a/OpenCL/m16600_a1-optimized.cl +++ b/OpenCL/m16600_a1-optimized.cl @@ -41,7 +41,7 @@ __kernel void m16600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -468,7 +468,7 @@ __kernel void m16600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl index 3c0f69cd6..466999ed8 100644 --- a/OpenCL/m16600_a1-pure.cl +++ b/OpenCL/m16600_a1-pure.cl @@ -42,7 +42,7 @@ __kernel void m16600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -215,7 +215,7 @@ __kernel void m16600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl index cfe4b374d..6aae22e31 100644 --- a/OpenCL/m16600_a3-optimized.cl +++ b/OpenCL/m16600_a3-optimized.cl @@ -322,7 +322,7 @@ __kernel void m16600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -417,7 +417,7 @@ __kernel void m16600_m08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -512,7 +512,7 @@ __kernel void m16600_m16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -607,7 +607,7 @@ __kernel void m16600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -702,7 +702,7 @@ __kernel void m16600_s08 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -797,7 +797,7 @@ __kernel void m16600_s16 (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl index abe32dfec..2af0be04f 100644 --- a/OpenCL/m16600_a3-pure.cl +++ b/OpenCL/m16600_a3-pure.cl @@ -42,7 +42,7 @@ __kernel void m16600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; @@ -228,7 +228,7 @@ __kernel void m16600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule __local u32 s_te3[256]; __local u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) + for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i];