From dad05d9f6931523d06cd745feb5d0354cfa0acf0 Mon Sep 17 00:00:00 2001
From: jsteube <jens.steube@gmail.com>
Date: Thu, 9 Aug 2018 13:03:22 +0200
Subject: [PATCH] Testing: Workaround some AMD OpenCL runtime segmentation
 faults

---
 OpenCL/inc_hash_md4.cl       | 8 ++++----
 OpenCL/inc_hash_md5.cl       | 8 ++++----
 OpenCL/inc_hash_ripemd160.cl | 8 ++++----
 OpenCL/inc_hash_sha1.cl      | 8 ++++----
 OpenCL/inc_hash_sha224.cl    | 8 ++++----
 OpenCL/inc_hash_sha256.cl    | 8 ++++----
 OpenCL/inc_hash_sha384.cl    | 8 ++++----
 OpenCL/inc_hash_sha512.cl    | 8 ++++----
 OpenCL/inc_hash_whirlpool.cl | 8 ++++----
 OpenCL/inc_vendor.cl         | 6 ++++++
 OpenCL/m01600-pure.cl        | 4 ++--
 OpenCL/m06300-pure.cl        | 4 ++--
 OpenCL/m11600-pure.cl        | 6 +++---
 docs/changes.txt             | 6 ++++++
 src/opencl.c                 | 2 +-
 15 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl
index cbce8c549..bce232d8b 100644
--- a/OpenCL/inc_hash_md4.cl
+++ b/OpenCL/inc_hash_md4.cl
@@ -110,7 +110,7 @@ DECLSPEC void md4_init (md4_ctx_t *ctx)
 
 DECLSPEC void md4_update_64 (md4_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -696,7 +696,7 @@ DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, const __global u32
 
 DECLSPEC void md4_final (md4_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
 
@@ -1230,7 +1230,7 @@ DECLSPEC void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx
 
 DECLSPEC void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -1560,7 +1560,7 @@ DECLSPEC void md4_update_vector_utf16le_swap (md4_ctx_vector_t *ctx, const u32x
 
 DECLSPEC void md4_final_vector (md4_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
 
diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl
index 167c20951..1ba6c0140 100644
--- a/OpenCL/inc_hash_md5.cl
+++ b/OpenCL/inc_hash_md5.cl
@@ -146,7 +146,7 @@ DECLSPEC void md5_init (md5_ctx_t *ctx)
 
 DECLSPEC void md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -732,7 +732,7 @@ DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, const __global u32
 
 DECLSPEC void md5_final (md5_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
 
@@ -1302,7 +1302,7 @@ DECLSPEC void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx
 
 DECLSPEC void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -1632,7 +1632,7 @@ DECLSPEC void md5_update_vector_utf16le_swap (md5_ctx_vector_t *ctx, const u32x
 
 DECLSPEC void md5_final_vector (md5_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
 
diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl
index b4c9cf907..bc5b8a2a4 100644
--- a/OpenCL/inc_hash_ripemd160.cl
+++ b/OpenCL/inc_hash_ripemd160.cl
@@ -244,7 +244,7 @@ DECLSPEC void ripemd160_init (ripemd160_ctx_t *ctx)
 
 DECLSPEC void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -830,7 +830,7 @@ DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, const
 
 DECLSPEC void ripemd160_final (ripemd160_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
 
@@ -1499,7 +1499,7 @@ DECLSPEC void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ri
 
 DECLSPEC void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -1829,7 +1829,7 @@ DECLSPEC void ripemd160_update_vector_utf16le_swap (ripemd160_ctx_vector_t *ctx,
 
 DECLSPEC void ripemd160_final_vector (ripemd160_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
 
diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl
index c2664c80f..8c13699d6 100644
--- a/OpenCL/inc_hash_sha1.cl
+++ b/OpenCL/inc_hash_sha1.cl
@@ -176,7 +176,7 @@ DECLSPEC void sha1_init (sha1_ctx_t *ctx)
 
 DECLSPEC void sha1_update_64 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -998,7 +998,7 @@ DECLSPEC void sha1_update_global_utf16be_swap (sha1_ctx_t *ctx, const __global u
 
 DECLSPEC void sha1_final (sha1_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
@@ -1599,7 +1599,7 @@ DECLSPEC void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t *
 
 DECLSPEC void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -2013,7 +2013,7 @@ DECLSPEC void sha1_update_vector_utf16beN (sha1_ctx_vector_t *ctx, const u32x *w
 
 DECLSPEC void sha1_final_vector (sha1_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl
index 8886b9de1..59f3b1512 100644
--- a/OpenCL/inc_hash_sha224.cl
+++ b/OpenCL/inc_hash_sha224.cl
@@ -161,7 +161,7 @@ DECLSPEC void sha224_init (sha224_ctx_t *ctx)
 
 DECLSPEC void sha224_update_64 (sha224_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -747,7 +747,7 @@ DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, const __glob
 
 DECLSPEC void sha224_final (sha224_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
@@ -1316,7 +1316,7 @@ DECLSPEC void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_c
 
 DECLSPEC void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -1688,7 +1688,7 @@ DECLSPEC void sha224_update_vector_utf16beN (sha224_ctx_vector_t *ctx, const u32
 
 DECLSPEC void sha224_final_vector (sha224_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl
index 756ae7c22..81154551b 100644
--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@@ -161,7 +161,7 @@ DECLSPEC void sha256_init (sha256_ctx_t *ctx)
 
 DECLSPEC void sha256_update_64 (sha256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -747,7 +747,7 @@ DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, const __glob
 
 DECLSPEC void sha256_final (sha256_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
@@ -1316,7 +1316,7 @@ DECLSPEC void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_c
 
 DECLSPEC void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -1688,7 +1688,7 @@ DECLSPEC void sha256_update_vector_utf16beN (sha256_ctx_vector_t *ctx, const u32
 
 DECLSPEC void sha256_final_vector (sha256_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl
index c9ef4da53..4f0371d8e 100644
--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@@ -185,7 +185,7 @@ DECLSPEC void sha384_init (sha384_ctx_t *ctx)
 
 DECLSPEC void sha384_update_128 (sha384_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   ctx->len += len;
 
@@ -1191,7 +1191,7 @@ DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, const __glob
 
 DECLSPEC void sha384_final (sha384_ctx_t *ctx)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);
 
@@ -2012,7 +2012,7 @@ DECLSPEC void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_c
 
 DECLSPEC void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   ctx->len += len;
 
@@ -2644,7 +2644,7 @@ DECLSPEC void sha384_update_vector_utf16beN (sha384_ctx_vector_t *ctx, const u32
 
 DECLSPEC void sha384_final_vector (sha384_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);
 
diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl
index aa3586498..869a5d998 100644
--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@@ -185,7 +185,7 @@ DECLSPEC void sha512_init (sha512_ctx_t *ctx)
 
 DECLSPEC void sha512_update_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   ctx->len += len;
 
@@ -1191,7 +1191,7 @@ DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, const __glob
 
 DECLSPEC void sha512_final (sha512_ctx_t *ctx)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);
 
@@ -2012,7 +2012,7 @@ DECLSPEC void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_c
 
 DECLSPEC void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   ctx->len += len;
 
@@ -2644,7 +2644,7 @@ DECLSPEC void sha512_update_vector_utf16beN (sha512_ctx_vector_t *ctx, const u32
 
 DECLSPEC void sha512_final_vector (sha512_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 127;
+  MAYBE_VOLATILE const int pos = ctx->len & 127;
 
   append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);
 
diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl
index 2c926f234..bff268064 100644
--- a/OpenCL/inc_hash_whirlpool.cl
+++ b/OpenCL/inc_hash_whirlpool.cl
@@ -1344,7 +1344,7 @@ DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u32 (*s_Ch)[256], S
 
 DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -1930,7 +1930,7 @@ DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, const
 
 DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
@@ -2603,7 +2603,7 @@ DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, wh
 
 DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   ctx->len += len;
 
@@ -2933,7 +2933,7 @@ DECLSPEC void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx,
 
 DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx)
 {
-  const int pos = ctx->len & 63;
+  MAYBE_VOLATILE const int pos = ctx->len & 63;
 
   append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
 
diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl
index d488f1e8c..da7166d93 100644
--- a/OpenCL/inc_vendor.cl
+++ b/OpenCL/inc_vendor.cl
@@ -96,6 +96,12 @@
 #define DECLSPEC
 #endif
 
+#if (defined IS_AMD && AMD_GCN < 3)
+#define MAYBE_VOLATILE volatile
+#else
+#define MAYBE_VOLATILE
+#endif
+
 /**
  * AMD specific
  */
diff --git a/OpenCL/m01600-pure.cl b/OpenCL/m01600-pure.cl
index 7dcfe1f26..f56d07f81 100644
--- a/OpenCL/m01600-pure.cl
+++ b/OpenCL/m01600-pure.cl
@@ -258,8 +258,8 @@ __kernel void m01600_loop (__global pw_t *pws, __global const kernel_rule_t *rul
 
     if (j1)
     {
-      const u32 off = wpc_len[pc] / 4;
-      const u32 mod = wpc_len[pc] % 4;
+      MAYBE_VOLATILE const u32 off = wpc_len[pc] / 4;
+      MAYBE_VOLATILE const u32 mod = wpc_len[pc] % 4;
 
       u32 *ptr = wpc[pc] + off - 4;
 
diff --git a/OpenCL/m06300-pure.cl b/OpenCL/m06300-pure.cl
index 7f9f6a56b..14e3baec8 100644
--- a/OpenCL/m06300-pure.cl
+++ b/OpenCL/m06300-pure.cl
@@ -248,8 +248,8 @@ __kernel void m06300_loop (__global pw_t *pws, __global const kernel_rule_t *rul
 
     if (j1)
     {
-      const u32 off = wpc_len[pc] / 4;
-      const u32 mod = wpc_len[pc] % 4;
+      MAYBE_VOLATILE const u32 off = wpc_len[pc] / 4;
+      MAYBE_VOLATILE const u32 mod = wpc_len[pc] % 4;
 
       u32 *ptr = wpc[pc] + off - 4;
 
diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl
index 43805f615..8eb9d48d9 100644
--- a/OpenCL/m11600-pure.cl
+++ b/OpenCL/m11600-pure.cl
@@ -12,10 +12,10 @@
 
 DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, const u32 append, u32 *digest)
 {
-  const u32 func_len = len & 63;
+  MAYBE_VOLATILE const u32 func_len = len & 63;
 
-  const u32 mod = func_len & 3;
-  const u32 div = func_len / 4;
+  MAYBE_VOLATILE const u32 mod = func_len & 3;
+  MAYBE_VOLATILE const u32 div = func_len / 4;
 
   u32 tmp0;
   u32 tmp1;
diff --git a/docs/changes.txt b/docs/changes.txt
index 01e58ea97..8e07e1218 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -1,5 +1,11 @@
 * changes v4.2.1 -> xxx
 
+##
+## Improvements
+##
+
+- Workaround some AMD OpenCL runtime segmentation faults
+
 ##
 ## Bugs
 ##
diff --git a/src/opencl.c b/src/opencl.c
index 6fab11b70..53fcc7bc7 100644
--- a/src/opencl.c
+++ b/src/opencl.c
@@ -4268,7 +4268,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
 
-    if (device_param->platform_vendor_id == VENDOR_ID_NV)
+    if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->is_rocm == false))))
     {
       // OK, so the problem here is the following:
       // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,