From dad05d9f6931523d06cd745feb5d0354cfa0acf0 Mon Sep 17 00:00:00 2001 From: jsteube Date: Thu, 9 Aug 2018 13:03:22 +0200 Subject: [PATCH] Testing: Workaround some AMD OpenCL runtime segmentation faults --- OpenCL/inc_hash_md4.cl | 8 ++++---- OpenCL/inc_hash_md5.cl | 8 ++++---- OpenCL/inc_hash_ripemd160.cl | 8 ++++---- OpenCL/inc_hash_sha1.cl | 8 ++++---- OpenCL/inc_hash_sha224.cl | 8 ++++---- OpenCL/inc_hash_sha256.cl | 8 ++++---- OpenCL/inc_hash_sha384.cl | 8 ++++---- OpenCL/inc_hash_sha512.cl | 8 ++++---- OpenCL/inc_hash_whirlpool.cl | 8 ++++---- OpenCL/inc_vendor.cl | 6 ++++++ OpenCL/m01600-pure.cl | 4 ++-- OpenCL/m06300-pure.cl | 4 ++-- OpenCL/m11600-pure.cl | 6 +++--- docs/changes.txt | 6 ++++++ src/opencl.c | 2 +- 15 files changed, 56 insertions(+), 44 deletions(-) diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl index cbce8c549..bce232d8b 100644 --- a/OpenCL/inc_hash_md4.cl +++ b/OpenCL/inc_hash_md4.cl @@ -110,7 +110,7 @@ DECLSPEC void md4_init (md4_ctx_t *ctx) DECLSPEC void md4_update_64 (md4_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -696,7 +696,7 @@ DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, const __global u32 DECLSPEC void md4_final (md4_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1230,7 +1230,7 @@ DECLSPEC void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx DECLSPEC void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -1560,7 +1560,7 @@ DECLSPEC void md4_update_vector_utf16le_swap (md4_ctx_vector_t *ctx, const u32x DECLSPEC void md4_final_vector (md4_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl index 167c20951..1ba6c0140 100644 --- a/OpenCL/inc_hash_md5.cl +++ b/OpenCL/inc_hash_md5.cl @@ -146,7 +146,7 @@ DECLSPEC void md5_init (md5_ctx_t *ctx) DECLSPEC void md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -732,7 +732,7 @@ DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, const __global u32 DECLSPEC void md5_final (md5_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1302,7 +1302,7 @@ DECLSPEC void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx DECLSPEC void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -1632,7 +1632,7 @@ DECLSPEC void md5_update_vector_utf16le_swap (md5_ctx_vector_t *ctx, const u32x DECLSPEC void md5_final_vector (md5_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl index b4c9cf907..bc5b8a2a4 100644 --- a/OpenCL/inc_hash_ripemd160.cl +++ b/OpenCL/inc_hash_ripemd160.cl @@ -244,7 +244,7 @@ DECLSPEC void ripemd160_init (ripemd160_ctx_t *ctx) DECLSPEC void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -830,7 +830,7 @@ DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, const DECLSPEC void ripemd160_final (ripemd160_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1499,7 +1499,7 @@ DECLSPEC void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ri DECLSPEC void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -1829,7 +1829,7 @@ DECLSPEC void ripemd160_update_vector_utf16le_swap (ripemd160_ctx_vector_t *ctx, DECLSPEC void ripemd160_final_vector (ripemd160_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl index c2664c80f..8c13699d6 100644 --- a/OpenCL/inc_hash_sha1.cl +++ b/OpenCL/inc_hash_sha1.cl @@ -176,7 +176,7 @@ DECLSPEC void sha1_init (sha1_ctx_t *ctx) DECLSPEC void sha1_update_64 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -998,7 +998,7 @@ DECLSPEC void sha1_update_global_utf16be_swap (sha1_ctx_t *ctx, const __global u DECLSPEC void sha1_final (sha1_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1599,7 +1599,7 @@ DECLSPEC void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t * DECLSPEC void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -2013,7 +2013,7 @@ DECLSPEC void sha1_update_vector_utf16beN (sha1_ctx_vector_t *ctx, const u32x *w DECLSPEC void sha1_final_vector (sha1_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl index 8886b9de1..59f3b1512 100644 --- a/OpenCL/inc_hash_sha224.cl +++ b/OpenCL/inc_hash_sha224.cl @@ -161,7 +161,7 @@ DECLSPEC void sha224_init (sha224_ctx_t *ctx) DECLSPEC void sha224_update_64 (sha224_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -747,7 +747,7 @@ DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, const __glob DECLSPEC void sha224_final (sha224_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1316,7 +1316,7 @@ DECLSPEC void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_c DECLSPEC void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -1688,7 +1688,7 @@ DECLSPEC void sha224_update_vector_utf16beN (sha224_ctx_vector_t *ctx, const u32 DECLSPEC void sha224_final_vector (sha224_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl index 756ae7c22..81154551b 100644 --- a/OpenCL/inc_hash_sha256.cl +++ b/OpenCL/inc_hash_sha256.cl @@ -161,7 +161,7 @@ DECLSPEC void sha256_init (sha256_ctx_t *ctx) DECLSPEC void sha256_update_64 (sha256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -747,7 +747,7 @@ DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, const __glob DECLSPEC void sha256_final (sha256_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1316,7 +1316,7 @@ DECLSPEC void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_c DECLSPEC void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -1688,7 +1688,7 @@ DECLSPEC void sha256_update_vector_utf16beN (sha256_ctx_vector_t *ctx, const u32 DECLSPEC void sha256_final_vector (sha256_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl index c9ef4da53..4f0371d8e 100644 --- a/OpenCL/inc_hash_sha384.cl +++ b/OpenCL/inc_hash_sha384.cl @@ -185,7 +185,7 @@ DECLSPEC void sha384_init (sha384_ctx_t *ctx) DECLSPEC void sha384_update_128 (sha384_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; ctx->len += len; @@ -1191,7 +1191,7 @@ DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, const __glob DECLSPEC void sha384_final (sha384_ctx_t *ctx) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -2012,7 +2012,7 @@ DECLSPEC void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_c DECLSPEC void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; ctx->len += len; @@ -2644,7 +2644,7 @@ DECLSPEC void sha384_update_vector_utf16beN (sha384_ctx_vector_t *ctx, const u32 DECLSPEC void sha384_final_vector (sha384_ctx_vector_t *ctx) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl index aa3586498..869a5d998 100644 --- a/OpenCL/inc_hash_sha512.cl +++ b/OpenCL/inc_hash_sha512.cl @@ -185,7 +185,7 @@ DECLSPEC void sha512_init (sha512_ctx_t *ctx) DECLSPEC void sha512_update_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; ctx->len += len; @@ -1191,7 +1191,7 @@ DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, const __glob DECLSPEC void sha512_final (sha512_ctx_t *ctx) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -2012,7 +2012,7 @@ DECLSPEC void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_c DECLSPEC void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; ctx->len += len; @@ -2644,7 +2644,7 @@ DECLSPEC void sha512_update_vector_utf16beN (sha512_ctx_vector_t *ctx, const u32 DECLSPEC void sha512_final_vector (sha512_ctx_vector_t *ctx) { - const int pos = ctx->len & 127; + MAYBE_VOLATILE const int pos = ctx->len & 127; append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index 2c926f234..bff268064 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -1344,7 +1344,7 @@ DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u32 (*s_Ch)[256], S DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -1930,7 +1930,7 @@ DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, const DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -2603,7 +2603,7 @@ DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, wh DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; ctx->len += len; @@ -2933,7 +2933,7 @@ DECLSPEC void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx, DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx) { - const int pos = ctx->len & 63; + MAYBE_VOLATILE const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index d488f1e8c..da7166d93 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -96,6 +96,12 @@ #define DECLSPEC #endif +#if (defined IS_AMD && AMD_GCN < 3) +#define MAYBE_VOLATILE volatile +#else +#define MAYBE_VOLATILE +#endif + /** * AMD specific */ diff --git a/OpenCL/m01600-pure.cl b/OpenCL/m01600-pure.cl index 7dcfe1f26..f56d07f81 100644 --- a/OpenCL/m01600-pure.cl +++ b/OpenCL/m01600-pure.cl @@ -258,8 +258,8 @@ __kernel void m01600_loop (__global pw_t *pws, __global const kernel_rule_t *rul if (j1) { - const u32 off = wpc_len[pc] / 4; - const u32 mod = wpc_len[pc] % 4; + MAYBE_VOLATILE const u32 off = wpc_len[pc] / 4; + MAYBE_VOLATILE const u32 mod = wpc_len[pc] % 4; u32 *ptr = wpc[pc] + off - 4; diff --git a/OpenCL/m06300-pure.cl b/OpenCL/m06300-pure.cl index 7f9f6a56b..14e3baec8 100644 --- a/OpenCL/m06300-pure.cl +++ b/OpenCL/m06300-pure.cl @@ -248,8 +248,8 @@ __kernel void m06300_loop (__global pw_t *pws, __global const kernel_rule_t *rul if (j1) { - const u32 off = wpc_len[pc] / 4; - const u32 mod = wpc_len[pc] % 4; + MAYBE_VOLATILE const u32 off = wpc_len[pc] / 4; + MAYBE_VOLATILE const u32 mod = wpc_len[pc] % 4; u32 *ptr = wpc[pc] + off - 4; diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 43805f615..8eb9d48d9 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -12,10 +12,10 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, const u32 append, u32 *digest) { - const u32 func_len = len & 63; + MAYBE_VOLATILE const u32 func_len = len & 63; - const u32 mod = func_len & 3; - const u32 div = func_len / 4; + MAYBE_VOLATILE const u32 mod = func_len & 3; + MAYBE_VOLATILE const u32 div = func_len / 4; u32 tmp0; u32 tmp1; diff --git a/docs/changes.txt b/docs/changes.txt index 01e58ea97..8e07e1218 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,5 +1,11 @@ * changes v4.2.1 -> xxx +## +## Improvements +## + +- Workaround some AMD OpenCL runtime segmentation faults + ## ## Bugs ## diff --git a/src/opencl.c b/src/opencl.c index 6fab11b70..53fcc7bc7 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -4268,7 +4268,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; - if (device_param->platform_vendor_id == VENDOR_ID_NV) + if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->is_rocm == false)))) { // OK, so the problem here is the following: // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,