From a7dbd73612dea22e657202c94918733da3e2b5bc Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 26 Nov 2022 13:32:33 +0100 Subject: [PATCH] Add Radeon Pro W5700X to more scrypt based hash-modes and fix some metal issues --- OpenCL/inc_zip_inflate.cl | 85 ++++++++++++++++++++----------------- OpenCL/m21800-pure.cl | 8 ++-- OpenCL/m22700-pure.cl | 14 +++--- OpenCL/m27700-pure.cl | 14 +++--- OpenCL/m29800-pure.cl | 14 +++--- src/Makefile | 2 +- src/backend.c | 4 ++ src/ext_metal.m | 4 ++ src/modules/module_09300.c | 16 +------ src/modules/module_15700.c | 13 +----- src/modules/module_21800.c | 23 +++++++++- src/modules/module_29800.c | 13 +----- tunings/Module_09300.hctune | 1 + tunings/Module_15700.hctune | 1 + tunings/Module_22700.hctune | 1 + tunings/Module_27700.hctune | 1 + tunings/Module_28200.hctune | 1 + tunings/Module_29800.hctune | 1 + 18 files changed, 110 insertions(+), 106 deletions(-) diff --git a/OpenCL/inc_zip_inflate.cl b/OpenCL/inc_zip_inflate.cl index 1f078f396..ec2268935 100644 --- a/OpenCL/inc_zip_inflate.cl +++ b/OpenCL/inc_zip_inflate.cl @@ -82,9 +82,9 @@ typedef Byte Bytef; typedef uInt uIntf; typedef char charf; typedef int intf; -typedef void *voidpf; -typedef void *voidp; -typedef void *const voidpc; +typedef PRIVATE_AS void *voidpf; +typedef PRIVATE_AS void *voidp; +typedef PRIVATE_AS void *const voidpc; #define Z_NULL 0 #define Z_NO_FLUSH MZ_NO_FLUSH #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH @@ -178,10 +178,10 @@ typedef int mz_bool; typedef mz_uint64 tinfl_bit_buf_t; -DECLSPEC void zlib_memcpy (void *dest, const void *src, u32 n) +DECLSPEC void zlib_memcpy (PRIVATE_AS void *dest, PRIVATE_AS const void *src, int n) { - char *csrc = (char *) src; - char *cdest = (char *) dest; + PRIVATE_AS char *csrc = (PRIVATE_AS char *) src; + PRIVATE_AS char *cdest = (PRIVATE_AS char *) dest; for (int i = 0; i < n; i++) { @@ -189,9 +189,9 @@ DECLSPEC void zlib_memcpy (void *dest, const void *src, u32 n) } } -DECLSPEC void zlib_memset (u8 *s, const u8 c, u32 len) +DECLSPEC void zlib_memset (PRIVATE_AS u8 *s, const u8 c, int len) { - u8 *dst = s; + PRIVATE_AS u8 *dst = s; while (len > 0) { @@ -213,7 +213,7 @@ DECLSPEC void zlib_memset (u8 *s, const u8 c, u32 len) #define MAYBE_GLOBAL GLOBAL_AS #else #define M_DICT_SIZE TINFL_LZ_DICT_SIZE -#define MAYBE_GLOBAL +#define MAYBE_GLOBAL PRIVATE_AS #endif #define TINFL_CR_FINISH } @@ -431,14 +431,14 @@ typedef struct mz_stream_s unsigned int avail_in; /* number of bytes available at next_in */ mz_ulong total_in; /* total number of bytes consumed so far */ - unsigned char *next_out; /* pointer to next byte to write */ + PRIVATE_AS unsigned char *next_out; /* pointer to next byte to write */ unsigned int avail_out; /* number of bytes that can be written to next_out */ mz_ulong total_out; /* total number of bytes produced so far */ - char *msg; /* error msg (unused) */ - inflate_state *state; /* internal state, allocated by zalloc/zfree */ + PRIVATE_AS char *msg; /* error msg (unused) */ + PRIVATE_AS inflate_state *state; /* internal state, allocated by zalloc/zfree */ - void *opaque; /* heap alloc function user pointer */ + PRIVATE_AS void *opaque; /* heap alloc function user pointer */ int data_type; /* data_type (unused) */ mz_ulong adler; /* adler32 of the source or uncompressed data */ @@ -454,17 +454,17 @@ typedef struct mz_stream_s } mz_stream; -typedef mz_stream *mz_streamp; +typedef PRIVATE_AS mz_stream *mz_streamp; // hashcat-patched: not needed functions: // void miniz_def_free_func(void *opaque, void *address); // void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); -DECLSPEC int mz_inflate(mz_streamp pStream, int flush); -DECLSPEC int mz_inflateEnd(mz_streamp pStream); +DECLSPEC int mz_inflate (mz_streamp pStream, int flush); +DECLSPEC int mz_inflateEnd (mz_streamp pStream); -DECLSPEC int mz_inflateInit2(mz_streamp pStream, int window_bits, inflate_state*); +DECLSPEC int mz_inflateInit2 (mz_streamp pStream, int window_bits, PRIVATE_AS inflate_state *); // hashcat-patched/hashcat-specific: DECLSPEC mz_uint8 pIn_xor_byte (const mz_uint8 c, mz_streamp pStream) @@ -485,15 +485,19 @@ DECLSPEC mz_uint8 pIn_xor_byte (const mz_uint8 c, mz_streamp pStream) } -DECLSPEC void zlib_memcpy_g(void *dest, MAYBE_GLOBAL const void *src, size_t n, mz_streamp pStream){ - MAYBE_GLOBAL char *csrc = (MAYBE_GLOBAL char *)src; - char *cdest = (char *)dest; - for (int i=0; im_type == 1) { - mz_uint8 *p = r->m_tables[0].m_code_size; + PRIVATE_AS mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; @@ -629,13 +634,13 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const for (; (int)r->m_type >= 0; r->m_type--) { int tree_next, tree_cur; - tinfl_huff_table *pTable; + PRIVATE_AS tinfl_huff_table *pTable; mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; - zlib_memset((u8 *) total_syms, 0, 64); - zlib_memset((u8 *) pTable->m_look_up, 0, TINFL_FAST_LOOKUP_SIZE * 2); - zlib_memset((u8 *) pTable->m_tree, 0, TINFL_MAX_HUFF_SYMBOLS_0 * 2 * 2); + zlib_memset ((PRIVATE_AS u8 *) total_syms, 0, 64); + zlib_memset ((PRIVATE_AS u8 *) pTable->m_look_up, 0, TINFL_FAST_LOOKUP_SIZE * 2); + zlib_memset ((PRIVATE_AS u8 *) pTable->m_tree, 0, TINFL_MAX_HUFF_SYMBOLS_0 * 2 * 2); for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; @@ -724,7 +729,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const } for (;;) { - mz_uint8 *pSrc; + PRIVATE_AS mz_uint8 *pSrc; for (;;) { if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) @@ -839,7 +844,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const } else if ((counter >= 9) && (counter <= dist)) { - const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + PRIVATE_AS const mz_uint8 *pSrc_end = pSrc + (counter & ~7); do { //((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; @@ -932,8 +937,8 @@ common_exit: *pOut_buf_size = pOut_buf_cur - pOut_buf_next; if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) { - const mz_uint8 *ptr = pOut_buf_next; - size_t buf_len = *pOut_buf_size; + PRIVATE_AS const mz_uint8 *ptr = pOut_buf_next; + PRIVATE_AS size_t buf_len = *pOut_buf_size; mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; while (buf_len) @@ -963,7 +968,7 @@ common_exit: } -DECLSPEC int mz_inflateInit2(mz_streamp pStream, int window_bits, inflate_state *pDecomp) +DECLSPEC int mz_inflateInit2 (mz_streamp pStream, int window_bits, PRIVATE_AS inflate_state *pDecomp) { if (pStream == 0) return MZ_STREAM_ERROR; @@ -978,7 +983,7 @@ DECLSPEC int mz_inflateInit2(mz_streamp pStream, int window_bits, inflate_state pStream->reserved = 0; //pStream->state = (struct mz_internal_state *)pDecomp; - pStream->state = (inflate_state *) pDecomp; + pStream->state = (PRIVATE_AS inflate_state *) pDecomp; tinfl_init(&pDecomp->m_decomp); pDecomp->m_dict_ofs = 0; @@ -993,7 +998,7 @@ DECLSPEC int mz_inflateInit2(mz_streamp pStream, int window_bits, inflate_state DECLSPEC int mz_inflate(mz_streamp pStream, int flush) { - inflate_state *pState; + PRIVATE_AS inflate_state *pState; mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; size_t in_bytes, out_bytes, orig_avail_in; tinfl_status status; @@ -1005,7 +1010,7 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush) if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; - pState = (inflate_state *)pStream->state; + pState = (PRIVATE_AS inflate_state *)pStream->state; if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; orig_avail_in = pStream->avail_in; @@ -1130,7 +1135,7 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush) // hashcat-patched: helper function for shifted u32 -DECLSPEC u32 GETSHIFTEDINT (u32 *a, const int n) +DECLSPEC u32 GETSHIFTEDINT (PRIVATE_AS u32 *a, const int n) { const int d = n / 4; const int m = n & 3; @@ -1144,9 +1149,9 @@ DECLSPEC u32 GETSHIFTEDINT (u32 *a, const int n) // hashcat-patched: faster zlib_memcpy for our large (TINFL_LZ_DICT_SIZE) move of bytes from the old output to the window/lookup table -DECLSPEC void hc_shift_inflate_dict (u8 *buf, const u32 offset, const u32 len) +DECLSPEC void hc_shift_inflate_dict (PRIVATE_AS u8 *buf, const u32 offset, const u32 len) { - u32 *ptr = (u32 *) buf; + PRIVATE_AS u32 *ptr = (PRIVATE_AS u32 *) buf; // we need to use len - 4 here to avoid buffer overflows caused by the u64 type in GETSHIFTEDINT @@ -1177,7 +1182,7 @@ DECLSPEC int hc_inflate (mz_streamp pStream) decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; - inflate_state *pState = pStream->state; + PRIVATE_AS inflate_state *pState = pStream->state; size_t out_bytes = pStream->avail_out; diff --git a/OpenCL/m21800-pure.cl b/OpenCL/m21800-pure.cl index 1c24f7a01..9317eb19f 100644 --- a/OpenCL/m21800-pure.cl +++ b/OpenCL/m21800-pure.cl @@ -574,7 +574,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) // input: infstream.avail_in = AES_LEN; - infstream.next_in = (u8 *) buf_full; + infstream.next_in = (PRIVATE_AS u8 *) buf_full; // output: @@ -599,7 +599,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) return; } - for (int i = 1; i < infstream.total_out; i++) + for (mz_ulong i = 1; i < infstream.total_out; i++) { if (tmp[i] == '\t') continue; if (tmp[i] == '\r') continue; @@ -638,7 +638,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) int qcnt2 = 0; int ccnt2 = 0; - for (int i = 1; i < infstream.total_out; i++) + for (mz_ulong i = 1; i < infstream.total_out; i++) { if (tmp[i] == '"') qcnt2++; if (tmp[i] == ':') ccnt2++; @@ -646,7 +646,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) if ((qcnt1 >= 1) && (ccnt1 >= 1) && (qcnt2 >= 4) && (ccnt2 >= 3)) { - const float entropy = hc_get_entropy ((const u32 *) tmp, infstream.total_out / 4); + const float entropy = hc_get_entropy ((PRIVATE_AS const u32 *) tmp, infstream.total_out / 4); if ((entropy >= MIN_ENTROPY) && (entropy <= MAX_ENTROPY)) { diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index 7b4ac655b..7d65fd52e 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -346,15 +346,15 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len); - u32 s0[4] = { 0 }; - u32 s1[4] = { 0 }; - u32 s2[4] = { 0 }; - u32 s3[4] = { 0 }; + u32 x0[4] = { 0 }; + u32 x1[4] = { 0 }; + u32 x2[4] = { 0 }; + u32 x3[4] = { 0 }; - s0[0] = MULTIBIT_S0; - s0[1] = MULTIBIT_S1; + x0[0] = MULTIBIT_S0; + x0[1] = MULTIBIT_S1; - sha256_hmac_update_64 (&sha256_hmac_ctx, s0, s1, s2, s3, 8); + sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8); for (u32 i = 0, j = 1, k = 0; i < SCRYPT_CNT; i += 8, j += 1, k += 2) { diff --git a/OpenCL/m27700-pure.cl b/OpenCL/m27700-pure.cl index 6997647b2..feaca11de 100644 --- a/OpenCL/m27700-pure.cl +++ b/OpenCL/m27700-pure.cl @@ -297,15 +297,15 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len); - u32 s0[4] = { 0 }; - u32 s1[4] = { 0 }; - u32 s2[4] = { 0 }; - u32 s3[4] = { 0 }; + u32 x0[4] = { 0 }; + u32 x1[4] = { 0 }; + u32 x2[4] = { 0 }; + u32 x3[4] = { 0 }; - s0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; - s0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + x0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + x0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; - sha256_hmac_update_64 (&sha256_hmac_ctx, s0, s1, s2, s3, 8); + sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8); for (u32 i = 0, j = 1, k = 0; i < SCRYPT_CNT; i += 8, j += 1, k += 2) { diff --git a/OpenCL/m29800-pure.cl b/OpenCL/m29800-pure.cl index 6ef297c50..de4260279 100644 --- a/OpenCL/m29800-pure.cl +++ b/OpenCL/m29800-pure.cl @@ -297,15 +297,15 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t)) sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len); - u32 s0[4] = { 0 }; - u32 s1[4] = { 0 }; - u32 s2[4] = { 0 }; - u32 s3[4] = { 0 }; + u32 x0[4] = { 0 }; + u32 x1[4] = { 0 }; + u32 x2[4] = { 0 }; + u32 x3[4] = { 0 }; - s0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; - s0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + x0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + x0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; - sha256_hmac_update_64 (&sha256_hmac_ctx, s0, s1, s2, s3, 8); + sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8); for (u32 i = 0, j = 1, k = 0; i < SCRYPT_CNT; i += 8, j += 1, k += 2) { diff --git a/src/Makefile b/src/Makefile index 27fc06c95..d91a2ede0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -232,7 +232,7 @@ CFLAGS_UNRAR += -Wno-unused-but-set-variable #Not supported on macOS 12.3 #CFLAGS_UNRAR += -Wno-format-overflow #Added hashcat 7.0.0 -CFLAGS_UNRAR += -Wno-class-memaccess +#CFLAGS_UNRAR += -Wno-class-memaccess CFLAGS_UNRAR += -Wno-misleading-indentation endif endif diff --git a/src/backend.c b/src/backend.c index 8ca07db07..1b05d8934 100644 --- a/src/backend.c +++ b/src/backend.c @@ -5832,17 +5832,21 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int mtl_major = 0; int mtl_minor = 0; + /* unused and deprecated if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_major, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, metal_device) == -1) { device_param->skipped = true; continue; } + */ + /* unused and deprecated if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_minor, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, metal_device) == -1) { device_param->skipped = true; continue; } + */ device_param->mtl_major = mtl_major; device_param->mtl_minor = mtl_minor; diff --git a/src/ext_metal.m b/src/ext_metal.m index c7daa57ba..c894941c3 100644 --- a/src/ext_metal.m +++ b/src/ext_metal.m @@ -410,6 +410,7 @@ int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t *pi = 32; break; + /* unused and deprecated case MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: *pi = 0; @@ -426,7 +427,9 @@ int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t } break; + */ + /* unused and deprecated case MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: *pi = 0; @@ -443,6 +446,7 @@ int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t } break; + */ case MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: // M1 max is 1024 diff --git a/src/modules/module_09300.c b/src/modules/module_09300.c index 4daaf30f1..f41b21453 100644 --- a/src/modules/module_09300.c +++ b/src/modules/module_09300.c @@ -52,20 +52,6 @@ static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 1; static const u64 SCRYPT_P = 1; -bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) - { - if (device_param->is_metal == false) - { - return true; - } - } - - return false; -} - u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 kernel_loops_min = 1024; @@ -424,6 +410,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = module_unstable_warning; + module_ctx->module_unstable_warning = MODULE_DEFAULT; module_ctx->module_warmup_disable = module_warmup_disable; } diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c index d30bb6716..5743dea70 100644 --- a/src/modules/module_15700.c +++ b/src/modules/module_15700.c @@ -59,17 +59,6 @@ static const u64 SCRYPT_N = 262144; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; -bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) - { - return true; - } - - return false; -} - u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 kernel_loops_min = 1024; @@ -530,6 +519,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = module_unstable_warning; + module_ctx->module_unstable_warning = MODULE_DEFAULT; module_ctx->module_warmup_disable = module_warmup_disable; } diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c index 9eb1168fd..13fd8523c 100644 --- a/src/modules/module_21800.c +++ b/src/modules/module_21800.c @@ -66,6 +66,27 @@ typedef struct electrum_tmp static const char *SIGNATURE_ELECTRUM = "$electrum$5*"; +bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + // problem with this kernel is the huge amount of register pressure on u8 tmp[TMPSIZ]; + // some runtimes cant handle it by swapping it to global memory + // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 + // workaround would be to rewrite kernel to use global memory + + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + return true; + } + + // AppleM1, OpenCL, MTLCompilerService never-end + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) + { + return true; + } + + return false; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (electrum_t); @@ -315,6 +336,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_unstable_warning = module_unstable_warning; module_ctx->module_warmup_disable = MODULE_DEFAULT; } diff --git a/src/modules/module_29800.c b/src/modules/module_29800.c index 7f4be0f1c..7b6e8e7a0 100644 --- a/src/modules/module_29800.c +++ b/src/modules/module_29800.c @@ -52,17 +52,6 @@ static const u64 SCRYPT_N = 32768; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 6; -bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) - { - return true; - } - - return false; -} - u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 kernel_loops_min = 1024; @@ -492,6 +481,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = module_unstable_warning; + module_ctx->module_unstable_warning = MODULE_DEFAULT; module_ctx->module_warmup_disable = module_warmup_disable; } diff --git a/tunings/Module_09300.hctune b/tunings/Module_09300.hctune index a2907079e..01f16dc8d 100644 --- a/tunings/Module_09300.hctune +++ b/tunings/Module_09300.hctune @@ -32,3 +32,4 @@ ALIAS_AMD_RX480 * 9300 1 232 ALIAS_AMD_Vega64 * 9300 1 440 A ALIAS_AMD_MI100 * 9300 1 1000 A ALIAS_AMD_RX6900XT * 9300 1 720 A +ALIAS_AMD_W5700X * 9300 1 3 A diff --git a/tunings/Module_15700.hctune b/tunings/Module_15700.hctune index 61080728a..8dc36c5f1 100644 --- a/tunings/Module_15700.hctune +++ b/tunings/Module_15700.hctune @@ -33,3 +33,4 @@ ALIAS_AMD_RX480 * 15700 1 58 ALIAS_AMD_Vega64 * 15700 1 53 A ALIAS_AMD_MI100 * 15700 1 120 A ALIAS_AMD_RX6900XT * 15700 1 56 A +ALIAS_AMD_W5700X * 15700 1 1 A diff --git a/tunings/Module_22700.hctune b/tunings/Module_22700.hctune index 916c143a6..7f194affc 100644 --- a/tunings/Module_22700.hctune +++ b/tunings/Module_22700.hctune @@ -34,3 +34,4 @@ ALIAS_AMD_RX480 * 22700 1 15 ALIAS_AMD_Vega64 * 22700 1 30 A ALIAS_AMD_MI100 * 22700 1 79 A ALIAS_AMD_RX6900XT * 22700 1 123 A +ALIAS_AMD_W5700X * 22700 1 4 A diff --git a/tunings/Module_27700.hctune b/tunings/Module_27700.hctune index 7b664ac50..bd490d4b2 100644 --- a/tunings/Module_27700.hctune +++ b/tunings/Module_27700.hctune @@ -34,3 +34,4 @@ ALIAS_AMD_RX480 * 27700 1 15 ALIAS_AMD_Vega64 * 27700 1 30 A ALIAS_AMD_MI100 * 27700 1 79 A ALIAS_AMD_RX6900XT * 27700 1 123 A +ALIAS_AMD_W5700X * 27700 1 4 A diff --git a/tunings/Module_28200.hctune b/tunings/Module_28200.hctune index c6bfda528..5527e1c04 100644 --- a/tunings/Module_28200.hctune +++ b/tunings/Module_28200.hctune @@ -34,3 +34,4 @@ ALIAS_AMD_RX480 * 28200 1 15 ALIAS_AMD_Vega64 * 28200 1 30 A ALIAS_AMD_MI100 * 28200 1 79 A ALIAS_AMD_RX6900XT * 28200 1 123 A +ALIAS_AMD_W5700X * 28200 1 4 A diff --git a/tunings/Module_29800.hctune b/tunings/Module_29800.hctune index 4563c3b34..14b5cec94 100644 --- a/tunings/Module_29800.hctune +++ b/tunings/Module_29800.hctune @@ -29,3 +29,4 @@ GeForce_RTX_2080_Ti * 29800 1 40 GeForce_RTX_3090 * 29800 1 82 A ALIAS_AMD_RX6900XT * 29800 1 125 A +ALIAS_AMD_W5700X * 29800 1 4 A