From b1d5f92c2d98a22f5cd156335a5d4ad7b4069db0 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 6 Mar 2020 15:48:01 +0100 Subject: [PATCH] Do not use __local keyword in -m 5500 for devices without real shared memory --- OpenCL/m05500_a0-optimized.cl | 22 +++++++++++-- OpenCL/m05500_a0-pure.cl | 22 +++++++++++-- OpenCL/m05500_a1-optimized.cl | 22 +++++++++++-- OpenCL/m05500_a1-pure.cl | 22 +++++++++++-- OpenCL/m05500_a3-optimized.cl | 62 ++++++++++++++++++++++++++++++++--- OpenCL/m05500_a3-pure.cl | 22 +++++++++++-- 6 files changed, 158 insertions(+), 14 deletions(-) diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl index 6a5c8451c..03dfe4f7a 100644 --- a/OpenCL/m05500_a0-optimized.cl +++ b/OpenCL/m05500_a0-optimized.cl @@ -356,7 +356,7 @@ CONSTANT_VK u32a c_skb[8][64] = #define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) #endif -DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64]) +DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32x r = data[0]; u32x l = data[1]; @@ -398,7 +398,7 @@ DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCA iv[1] = r; } -DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64]) +DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_skb)[64]) { u32x tt; @@ -516,6 +516,8 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_RULES ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -542,6 +544,13 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_RULES ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -729,6 +738,8 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_RULES ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -755,6 +766,13 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_RULES ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl index 7681eea15..ac9c30f35 100644 --- a/OpenCL/m05500_a0-pure.cl +++ b/OpenCL/m05500_a0-pure.cl @@ -356,7 +356,7 @@ CONSTANT_VK u32a c_skb[8][64] = #define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) #endif -DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64]) +DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32 r = data[0]; u32 l = data[1]; @@ -398,7 +398,7 @@ DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS iv[1] = r; } -DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_skb)[64]) +DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_skb)[64]) { u32 tt; @@ -516,6 +516,8 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_RULES ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -542,6 +544,13 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_RULES ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -640,6 +649,8 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_RULES ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -666,6 +677,13 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_RULES ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl index d4ae2526a..39a7ed212 100644 --- a/OpenCL/m05500_a1-optimized.cl +++ b/OpenCL/m05500_a1-optimized.cl @@ -354,7 +354,7 @@ CONSTANT_VK u32a c_skb[8][64] = #define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) #endif -DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64]) +DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32x r = data[0]; u32x l = data[1]; @@ -396,7 +396,7 @@ DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCA iv[1] = r; } -DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64]) +DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_skb)[64]) { u32x tt; @@ -514,6 +514,8 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_BASIC ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -540,6 +542,13 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_BASIC ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -780,6 +789,8 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_BASIC ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -806,6 +817,13 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_BASIC ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl index 28b5a627c..c53e12357 100644 --- a/OpenCL/m05500_a1-pure.cl +++ b/OpenCL/m05500_a1-pure.cl @@ -354,7 +354,7 @@ CONSTANT_VK u32a c_skb[8][64] = #define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) #endif -DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64]) +DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32 r = data[0]; u32 l = data[1]; @@ -396,7 +396,7 @@ DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS iv[1] = r; } -DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_skb)[64]) +DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_skb)[64]) { u32 tt; @@ -514,6 +514,8 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_BASIC ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -540,6 +542,13 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_BASIC ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -636,6 +645,8 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_BASIC ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -662,6 +673,13 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_BASIC ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl index 90a3c3b14..a52b1cc7c 100644 --- a/OpenCL/m05500_a3-optimized.cl +++ b/OpenCL/m05500_a3-optimized.cl @@ -354,7 +354,7 @@ CONSTANT_VK u32a c_skb[8][64] = #define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) #endif -DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64]) +DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32x r = data[0]; u32x l = data[1]; @@ -396,7 +396,7 @@ DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCA iv[1] = r; } -DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64]) +DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_skb)[64]) { u32x tt; @@ -500,7 +500,7 @@ DECLSPEC void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x *out) | ((k[7] & 0xff) << 24); } -DECLSPEC void m05500m (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +DECLSPEC void m05500m (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) { /** * modifier @@ -657,7 +657,7 @@ DECLSPEC void m05500m (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], } } -DECLSPEC void m05500s (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +DECLSPEC void m05500s (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) { /** * modifier @@ -847,6 +847,8 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -873,6 +875,13 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -921,6 +930,8 @@ KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -947,6 +958,13 @@ KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -995,6 +1013,8 @@ KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -1021,6 +1041,13 @@ KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -1069,6 +1096,8 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -1095,6 +1124,13 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -1143,6 +1179,8 @@ KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -1169,6 +1207,13 @@ KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -1217,6 +1262,8 @@ KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -1243,6 +1290,13 @@ KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl index 25e6392fb..e691330cd 100644 --- a/OpenCL/m05500_a3-pure.cl +++ b/OpenCL/m05500_a3-pure.cl @@ -354,7 +354,7 @@ CONSTANT_VK u32a c_skb[8][64] = #define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) #endif -DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64]) +DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32x r = data[0]; u32x l = data[1]; @@ -396,7 +396,7 @@ DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCA iv[1] = r; } -DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64]) +DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_skb)[64]) { u32x tt; @@ -514,6 +514,8 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -540,6 +542,13 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /** @@ -649,6 +658,8 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_VECTOR ()) * sbox, kbox */ + #ifdef REAL_SHM + LOCAL_VK u32 s_SPtrans[8][64]; LOCAL_VK u32 s_skb[8][64]; @@ -675,6 +686,13 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_VECTOR ()) SYNC_THREADS (); + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + if (gid >= gid_max) return; /**