Workaround rocm OpenCL runtime bug when copy data from constant to local memory

pull/1656/head
jsteube 6 years ago
parent 188a9568ce
commit 68bff94980

@ -498,7 +498,7 @@ __kernel void m01500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -582,7 +582,7 @@ __kernel void m01500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -496,7 +496,7 @@ __kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -659,7 +659,7 @@ __kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -1037,7 +1037,7 @@ __kernel void m02500_aux3 (__global pw_t *pws, __global const kernel_rule_t *rul
__local u32 s_te3[256];
__local u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_td0[i] = td0[i];
s_td1[i] = td1[i];

@ -768,7 +768,7 @@ __kernel void m02501_aux3 (__global pw_t *pws, __global const kernel_rule_t *rul
__local u32 s_te3[256];
__local u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_td0[i] = td0[i];
s_td1[i] = td1[i];

@ -42,7 +42,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -326,7 +326,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m02610_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -146,7 +146,7 @@ __kernel void m02610_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -383,7 +383,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m02610_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -142,7 +142,7 @@ __kernel void m02610_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -606,7 +606,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -676,7 +676,7 @@ __kernel void m02610_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -746,7 +746,7 @@ __kernel void m02610_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -816,7 +816,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -886,7 +886,7 @@ __kernel void m02610_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -956,7 +956,7 @@ __kernel void m02610_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m02610_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -155,7 +155,7 @@ __kernel void m02610_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -411,7 +411,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -468,7 +468,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -775,7 +775,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -845,7 +845,7 @@ __kernel void m02710_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -915,7 +915,7 @@ __kernel void m02710_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -985,7 +985,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1055,7 +1055,7 @@ __kernel void m02710_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1125,7 +1125,7 @@ __kernel void m02710_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -412,7 +412,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m02810_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -169,7 +169,7 @@ __kernel void m02810_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -469,7 +469,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m02810_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -165,7 +165,7 @@ __kernel void m02810_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -777,7 +777,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -847,7 +847,7 @@ __kernel void m02810_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -917,7 +917,7 @@ __kernel void m02810_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -987,7 +987,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1057,7 +1057,7 @@ __kernel void m02810_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1127,7 +1127,7 @@ __kernel void m02810_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m02810_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -178,7 +178,7 @@ __kernel void m02810_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -508,7 +508,7 @@ __kernel void m03000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -593,7 +593,7 @@ __kernel void m03000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -506,7 +506,7 @@ __kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -670,7 +670,7 @@ __kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -504,7 +504,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -727,7 +727,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -502,7 +502,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -785,7 +785,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -900,7 +900,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -974,7 +974,7 @@ __kernel void m03100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -1052,7 +1052,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -1126,7 +1126,7 @@ __kernel void m03100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -42,7 +42,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -357,7 +357,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m03710_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -159,7 +159,7 @@ __kernel void m03710_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -414,7 +414,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m03710_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -155,7 +155,7 @@ __kernel void m03710_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -633,7 +633,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -737,7 +737,7 @@ __kernel void m03710_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -807,7 +807,7 @@ __kernel void m03710_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -877,7 +877,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -947,7 +947,7 @@ __kernel void m03710_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -983,7 +983,7 @@ __kernel void m03710_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m03710_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -168,7 +168,7 @@ __kernel void m03710_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m03910_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -412,7 +412,7 @@ __kernel void m03910_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m03910_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -169,7 +169,7 @@ __kernel void m03910_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m03910_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -469,7 +469,7 @@ __kernel void m03910_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m03910_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -165,7 +165,7 @@ __kernel void m03910_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -777,7 +777,7 @@ __kernel void m03910_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -847,7 +847,7 @@ __kernel void m03910_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -917,7 +917,7 @@ __kernel void m03910_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -987,7 +987,7 @@ __kernel void m03910_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1057,7 +1057,7 @@ __kernel void m03910_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1127,7 +1127,7 @@ __kernel void m03910_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m03910_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -178,7 +178,7 @@ __kernel void m03910_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04010_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -383,7 +383,7 @@ __kernel void m04010_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m04010_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -150,7 +150,7 @@ __kernel void m04010_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04010_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -439,7 +439,7 @@ __kernel void m04010_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04010_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -146,7 +146,7 @@ __kernel void m04010_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -673,7 +673,7 @@ __kernel void m04010_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -777,7 +777,7 @@ __kernel void m04010_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -847,7 +847,7 @@ __kernel void m04010_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -917,7 +917,7 @@ __kernel void m04010_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -987,7 +987,7 @@ __kernel void m04010_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1023,7 +1023,7 @@ __kernel void m04010_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04010_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -163,7 +163,7 @@ __kernel void m04010_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04110_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -428,7 +428,7 @@ __kernel void m04110_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m04110_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -163,7 +163,7 @@ __kernel void m04110_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04110_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -486,7 +486,7 @@ __kernel void m04110_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04110_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -159,7 +159,7 @@ __kernel void m04110_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -729,7 +729,7 @@ __kernel void m04110_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -833,7 +833,7 @@ __kernel void m04110_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -903,7 +903,7 @@ __kernel void m04110_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -973,7 +973,7 @@ __kernel void m04110_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1043,7 +1043,7 @@ __kernel void m04110_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1079,7 +1079,7 @@ __kernel void m04110_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04110_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -174,7 +174,7 @@ __kernel void m04110_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -326,7 +326,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m04310_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -146,7 +146,7 @@ __kernel void m04310_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -383,7 +383,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04310_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -142,7 +142,7 @@ __kernel void m04310_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -606,7 +606,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -676,7 +676,7 @@ __kernel void m04310_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -746,7 +746,7 @@ __kernel void m04310_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -816,7 +816,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -886,7 +886,7 @@ __kernel void m04310_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -956,7 +956,7 @@ __kernel void m04310_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04310_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -155,7 +155,7 @@ __kernel void m04310_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -352,7 +352,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -44,7 +44,7 @@ __kernel void m04400_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -141,7 +141,7 @@ __kernel void m04400_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -408,7 +408,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -137,7 +137,7 @@ __kernel void m04400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -577,7 +577,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -647,7 +647,7 @@ __kernel void m04400_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -717,7 +717,7 @@ __kernel void m04400_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -787,7 +787,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -857,7 +857,7 @@ __kernel void m04400_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -927,7 +927,7 @@ __kernel void m04400_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04400_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -150,7 +150,7 @@ __kernel void m04400_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -379,7 +379,7 @@ __kernel void m04500_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m04500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -140,7 +140,7 @@ __kernel void m04500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -435,7 +435,7 @@ __kernel void m04500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -136,7 +136,7 @@ __kernel void m04500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -637,7 +637,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -707,7 +707,7 @@ __kernel void m04500_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -777,7 +777,7 @@ __kernel void m04500_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -847,7 +847,7 @@ __kernel void m04500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -917,7 +917,7 @@ __kernel void m04500_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -987,7 +987,7 @@ __kernel void m04500_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -149,7 +149,7 @@ __kernel void m04500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04520_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -610,7 +610,7 @@ __kernel void m04520_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -43,7 +43,7 @@ __kernel void m04520_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -155,7 +155,7 @@ __kernel void m04520_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04520_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -666,7 +666,7 @@ __kernel void m04520_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04520_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -151,7 +151,7 @@ __kernel void m04520_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -1090,7 +1090,7 @@ __kernel void m04520_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1160,7 +1160,7 @@ __kernel void m04520_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1230,7 +1230,7 @@ __kernel void m04520_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1300,7 +1300,7 @@ __kernel void m04520_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1370,7 +1370,7 @@ __kernel void m04520_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -1440,7 +1440,7 @@ __kernel void m04520_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -41,7 +41,7 @@ __kernel void m04520_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -166,7 +166,7 @@ __kernel void m04520_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -335,7 +335,7 @@ __kernel void m04700_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -44,7 +44,7 @@ __kernel void m04700_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -136,7 +136,7 @@ __kernel void m04700_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -40,7 +40,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -388,7 +388,7 @@ __kernel void m04700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04700_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -132,7 +132,7 @@ __kernel void m04700_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -577,7 +577,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -647,7 +647,7 @@ __kernel void m04700_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -717,7 +717,7 @@ __kernel void m04700_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -787,7 +787,7 @@ __kernel void m04700_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -857,7 +857,7 @@ __kernel void m04700_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -927,7 +927,7 @@ __kernel void m04700_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -42,7 +42,7 @@ __kernel void m04700_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
@ -145,7 +145,7 @@ __kernel void m04700_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;

@ -116,14 +116,14 @@ __kernel void m05300_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -291,14 +291,14 @@ __kernel void m05300_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}

@ -114,14 +114,14 @@ __kernel void m05300_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -349,14 +349,14 @@ __kernel void m05300_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}

@ -420,14 +420,14 @@ __kernel void m05300_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -493,14 +493,14 @@ __kernel void m05300_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -566,14 +566,14 @@ __kernel void m05300_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -639,14 +639,14 @@ __kernel void m05300_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -712,14 +712,14 @@ __kernel void m05300_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}
@ -785,14 +785,14 @@ __kernel void m05300_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = ikepsk_bufs[digests_offset].nr_buf[i];
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = ikepsk_bufs[digests_offset].msg_buf[i];
}

@ -120,14 +120,14 @@ __kernel void m05400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -304,14 +304,14 @@ __kernel void m05400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}

@ -118,14 +118,14 @@ __kernel void m05400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -370,14 +370,14 @@ __kernel void m05400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}

@ -424,14 +424,14 @@ __kernel void m05400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -497,14 +497,14 @@ __kernel void m05400_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -570,14 +570,14 @@ __kernel void m05400_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -643,14 +643,14 @@ __kernel void m05400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -716,14 +716,14 @@ __kernel void m05400_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}
@ -789,14 +789,14 @@ __kernel void m05400_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_nr_buf[16];
for (u32 i = lid; i < 16; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 16; i += lsz)
{
s_nr_buf[i] = swap32_S (ikepsk_bufs[digests_offset].nr_buf[i]);
}
__local u32 s_msg_buf[128];
for (u32 i = lid; i < 128; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 128; i += lsz)
{
s_msg_buf[i] = swap32_S (ikepsk_bufs[digests_offset].msg_buf[i]);
}

@ -505,7 +505,7 @@ __kernel void m05500_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -718,7 +718,7 @@ __kernel void m05500_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -506,7 +506,7 @@ __kernel void m05500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -630,7 +630,7 @@ __kernel void m05500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -503,7 +503,7 @@ __kernel void m05500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -769,7 +769,7 @@ __kernel void m05500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -504,7 +504,7 @@ __kernel void m05500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -626,7 +626,7 @@ __kernel void m05500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -836,7 +836,7 @@ __kernel void m05500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -910,7 +910,7 @@ __kernel void m05500_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -988,7 +988,7 @@ __kernel void m05500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -1062,7 +1062,7 @@ __kernel void m05500_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -504,7 +504,7 @@ __kernel void m05500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
@ -639,7 +639,7 @@ __kernel void m05500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_SPtrans[8][64];
__local u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];

@ -117,14 +117,14 @@ __kernel void m05600_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -356,14 +356,14 @@ __kernel void m05600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}

@ -115,14 +115,14 @@ __kernel void m05600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -412,14 +412,14 @@ __kernel void m05600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}

@ -539,14 +539,14 @@ __kernel void m05600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -612,14 +612,14 @@ __kernel void m05600_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -685,14 +685,14 @@ __kernel void m05600_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -758,14 +758,14 @@ __kernel void m05600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -831,14 +831,14 @@ __kernel void m05600_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}
@ -904,14 +904,14 @@ __kernel void m05600_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_userdomain_buf[64];
for (u32 i = lid; i < 64; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 64; i += lsz)
{
s_userdomain_buf[i] = netntlm_bufs[digests_offset].userdomain_buf[i];
}
__local u32 s_chall_buf[256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_chall_buf[i] = netntlm_bufs[digests_offset].chall_buf[i];
}

@ -2303,7 +2303,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul
__local u32 s_pc_dec[1024];
__local u32 s_pc_len[1024];
for (u32 i = lid; i < 1024; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 1024; i += lsz)
{
s_pc_dec[i] = c_pc_dec[i];
s_pc_len[i] = c_pc_len[i];

@ -2244,7 +2244,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul
__local u32 s_pc_dec[1024];
__local u32 s_pc_len[1024];
for (u32 i = lid; i < 1024; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 1024; i += lsz)
{
s_pc_dec[i] = c_pc_dec[i];
s_pc_len[i] = c_pc_len[i];

@ -39,7 +39,7 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -179,7 +179,7 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];

@ -34,7 +34,7 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -118,7 +118,7 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];

@ -37,7 +37,7 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -235,7 +235,7 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];

@ -32,7 +32,7 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -114,7 +114,7 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];

@ -187,7 +187,7 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -279,7 +279,7 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -371,7 +371,7 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -463,7 +463,7 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -555,7 +555,7 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
@ -647,7 +647,7 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
for (u32 i = lid; i < 256; i += lsz)
for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save