From c87a87f99299741c36fe822e5a9b6b2c071b1d81 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 9 Jun 2025 11:02:34 +0200 Subject: [PATCH] Improvements to SCRYPT autotuning strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit General: The logic for calculating the SCRYPT workload has been moved from module_extra_buffer_size() to module_extra_tuningdb_block(). Previously, this function just returned values from a static tuning file. Now, it actually computes tuning values on the fly based on the device's resources and SCRYPT parameters. This was always possible, it just wasn't used that way until now. After running the calculation, the calculated kernel_accel value is injected into the tuning database as if it had come from a file. The tmto value is stored internally. Users can still override kernel-threads, kernel-accel, and scrypt-tmto via the command line or via tuningdb file. module_extra_tuningdb_block(): This is now where kernel_accel and tmto are automatically calculated. The logic for accel and tmto is now separated and more flexible. Whether the user is using defaults, tuningdb entries, or manual command line overrides, the code logic will try to make smart choices based on what's actually available on the device. First, it tries to find a kernel_accel value that fits into available memory. It starts with a base value and simulates tmto=1 or 2 (which is typical good on GPU). It also leaves room for other buffers (like pws[], tmps[], etc.). If the result is close to the actual processor count, it gets clamped. This value is then added to the tuning database, so hashcat can pick it up during startup. Once that's set, it derives tmto using available memory, thread count, and the actual SCRYPT parameters. module_extra_buffer_size(): This function now just returns the size of the SCRYPT B[] buffer, based on the tmto that was already calculated. kernel_threads: Defaults are now set to 32 threads in most cases. On AMD GPUs, 64 threads might give a slight performance bump, but 32 is more consistent and reliable. For very memory-heavy algorithms (like Ethereum Wallet), it scales down the thread count. Here's a rough reference for other SCRYPT-based modes: - 64 MiB: 16 threads - 256 MiB: 4 threads Tuning files: All built-in tuningdb entries have been removed, because they shouldn’t be needed anymore. But you can still add custom entries if needed. There’s even a commented-out example in the tuningdb file for mode 22700. Free memory handling: Getting the actual amount of free GPU memory is critical for this to work right. Unfortunately, none of the common GPGPU APIs give reliable numbers. We now query low-level interfaces like SYSFS (AMD) and NVML (NVIDIA). Support for those APIs is in place already, except for ADL, which still needs to be added. Because of this, hwmon support (which handles those low-level queries) can no longer be disabled. --- include/ext_nvml.h | 15 +++ include/ext_sysfs_amdgpu.h | 1 + include/hwmon.h | 1 + include/modules.h | 2 +- include/tuningdb.h | 2 +- include/types.h | 3 +- src/backend.c | 42 +++++- src/ext_nvml.c | 22 +++ src/ext_sysfs_amdgpu.c | 52 +++++++ src/hwmon.c | 73 +++++++++- src/modules/module_08900.c | 241 ++++++++++++--------------------- src/modules/module_09300.c | 241 ++++++++++++--------------------- src/modules/module_15700.c | 241 ++++++++++++--------------------- src/modules/module_22700.c | 241 ++++++++++++--------------------- src/modules/module_24000.c | 261 ++++++++++++------------------------ src/modules/module_27700.c | 241 ++++++++++++--------------------- src/modules/module_28200.c | 241 ++++++++++++--------------------- src/modules/module_29800.c | 241 ++++++++++++--------------------- src/tuningdb.c | 17 +-- src/user_options.c | 20 ++- tunings/Module_08900.hctune | 1 - tunings/Module_09300.hctune | 4 - tunings/Module_15700.hctune | 4 - tunings/Module_22700.hctune | 13 +- tunings/Module_24000.hctune | 4 - tunings/Module_27700.hctune | 4 - tunings/Module_28200.hctune | 4 - tunings/Module_29800.hctune | 5 - 28 files changed, 941 insertions(+), 1296 deletions(-) diff --git a/include/ext_nvml.h b/include/ext_nvml.h index 02c5d490c..0215e1a32 100644 --- a/include/ext_nvml.h +++ b/include/ext_nvml.h @@ -161,6 +161,18 @@ typedef enum nvmlGom_enum * */ #define nvmlClocksThrottleReasonNone 0x0000000000000000LL +/** + * Memory allocation information for a device (v1). + * The total amount is equal to the sum of the amounts of free and used memory. + */ +typedef struct nvmlMemory_st +{ + unsigned long long total; //!< Total physical device memory (in bytes) + unsigned long long free; //!< Unallocated device memory (in bytes) + unsigned long long used; //!< Sum of Reserved and Allocated device memory (in bytes). + //!< Note that the driver/GPU always sets aside a small amount of memory for bookkeeping +} nvmlMemory_t; + /* * End of declarations from nvml.h **/ @@ -191,6 +203,7 @@ typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREAS typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_SET_COMPUTEMODE) (nvmlDevice_t, nvmlComputeMode_t); typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_SET_OPERATIONMODE) (nvmlDevice_t, nvmlGpuOperationMode_t); typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_PCIINFO) (nvmlDevice_t, nvmlPciInfo_t *); +typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_MEMORYINFO) (nvmlDevice_t, nvmlMemory_t *); typedef struct hm_nvml_lib { @@ -212,6 +225,7 @@ typedef struct hm_nvml_lib NVML_DEVICE_GET_CURRENTCLOCKSTHROTTLEREASONS nvmlDeviceGetCurrentClocksThrottleReasons; NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREASONS nvmlDeviceGetSupportedClocksThrottleReasons; NVML_DEVICE_GET_PCIINFO nvmlDeviceGetPciInfo; + NVML_DEVICE_GET_MEMORYINFO nvmlDeviceGetMemoryInfo; } hm_nvml_lib_t; @@ -232,5 +246,6 @@ int hm_NVML_nvmlDeviceGetClockInfo (void *hashcat_ctx, nvmlDevice_t device, nvml int hm_NVML_nvmlDeviceGetTemperatureThreshold (void *hashcat_ctx, nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp); int hm_NVML_nvmlDeviceGetCurrPcieLinkWidth (void *hashcat_ctx, nvmlDevice_t device, unsigned int *currLinkWidth); int hm_NVML_nvmlDeviceGetPciInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlPciInfo_t *pci); +int hm_NVML_nvmlDeviceGetMemoryInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlMemory_t *mem); #endif // HC_NVML_H diff --git a/include/ext_sysfs_amdgpu.h b/include/ext_sysfs_amdgpu.h index 50c0dc569..d381d9cec 100644 --- a/include/ext_sysfs_amdgpu.h +++ b/include/ext_sysfs_amdgpu.h @@ -34,5 +34,6 @@ int hm_SYSFS_AMDGPU_get_pp_dpm_sclk (void *hashcat_ctx, const int backend_device int hm_SYSFS_AMDGPU_get_pp_dpm_mclk (void *hashcat_ctx, const int backend_device_idx, int *val); int hm_SYSFS_AMDGPU_get_pp_dpm_pcie (void *hashcat_ctx, const int backend_device_idx, int *val); int hm_SYSFS_AMDGPU_get_gpu_busy_percent (void *hashcat_ctx, const int backend_device_idx, int *val); +int hm_SYSFS_AMDGPU_get_mem_info_vram_used (void *hashcat_ctx, const int backend_device_idx, u64 *val); #endif // HC_EXT_SYSFS_AMDGPU_H diff --git a/include/hwmon.h b/include/hwmon.h index 545e22b2d..3d4bd7940 100644 --- a/include/hwmon.h +++ b/include/hwmon.h @@ -24,6 +24,7 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); +u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx); void hwmon_ctx_destroy (hashcat_ctx_t *hashcat_ctx); diff --git a/include/modules.h b/include/modules.h index aed8403ce..713b3f46f 100644 --- a/include/modules.h +++ b/include/modules.h @@ -20,7 +20,7 @@ u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *ha u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes); +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel); u32 module_forced_outfile_format (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); diff --git a/include/tuningdb.h b/include/tuningdb.h index 608a25cfd..b2cafff67 100644 --- a/include/tuningdb.h +++ b/include/tuningdb.h @@ -17,7 +17,7 @@ int sort_by_tuning_db_entry (const void *v1, const void *v2); int tuning_db_init (hashcat_ctx_t *hashcat_ctx); void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx); -bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num, const int source); +bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num); tuning_db_entry_t *tuning_db_search (hashcat_ctx_t *hashcat_ctx, const char *device_name, const cl_device_type device_type, int attack_mode, const int hash_mode); #endif // HC_TUNINGDB_H diff --git a/include/types.h b/include/types.h index d02c1b783..8f265ab14 100644 --- a/include/types.h +++ b/include/types.h @@ -2067,6 +2067,7 @@ typedef struct hm_attrs bool threshold_slowdown_get_supported; bool throttle_get_supported; bool utilization_get_supported; + bool memoryused_get_supported; } hm_attrs_t; @@ -3013,7 +3014,7 @@ typedef struct module_ctx u32 (*module_dgst_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); bool (*module_dictstat_disable) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); u64 (*module_esalt_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); - const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *, const backend_ctx_t *, const hashes_t *); + const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *, const backend_ctx_t *, const hashes_t *, const u32, const u32); u32 (*module_forced_outfile_format) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); u32 (*module_hash_category) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); const char *(*module_hash_name) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); diff --git a/src/backend.c b/src/backend.c index c5b95a659..80d1bfd60 100644 --- a/src/backend.c +++ b/src/backend.c @@ -24,6 +24,7 @@ #include "dynloader.h" #include "backend.h" #include "terminal.h" +#include "hwmon.h" #if defined (__linux__) static const char *const dri_card0_path = "/dev/dri/card0"; @@ -9649,7 +9650,44 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT) { - const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra, backend_ctx, hashes); + // We need this because we can't trust CUDA/HIP to give us the real free device memory + // The only way to do so is through low level APIs + + for (int i = 0; i < 10; i++) + { + const u64 used_bytes = hm_get_memoryused_with_devices_idx (hashcat_ctx, device_id); + + if (used_bytes) + { + if ((used_bytes > (2ULL * 1024 * 1024 * 1024)) + || (used_bytes > (device_param->device_global_mem * 0.5))) + { + event_log_warning (hashcat_ctx, "* Device #%u: Memory usage is too high: %" PRIu64 "/%" PRIu64 ", waiting...", device_id + 1, used_bytes, device_param->device_global_mem); + + sleep (1); + + continue; + } + + device_param->device_available_mem -= used_bytes; + + break; + } + else + { + break; + } + } + + u32 _kernel_accel = 0; + + tuning_db_entry_t *tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode); + + if (tuningdb_entry != NULL) _kernel_accel = tuningdb_entry->kernel_accel; + + if (user_options->kernel_accel_chgd == true) _kernel_accel = user_options->kernel_accel; + + const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra, backend_ctx, hashes, device_id, _kernel_accel); char *lines_buf = hcstrdup (extra_tuningdb_block); @@ -9669,7 +9707,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (next[0] == '#') continue; - tuning_db_process_line (hashcat_ctx, next, line_num, 2); + tuning_db_process_line (hashcat_ctx, next, line_num); } while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL); diff --git a/src/ext_nvml.c b/src/ext_nvml.c index 25911df14..e6d49cd08 100644 --- a/src/ext_nvml.c +++ b/src/ext_nvml.c @@ -149,6 +149,7 @@ int nvml_init (void *hashcat_ctx) HC_LOAD_FUNC(nvml, nvmlDeviceGetCurrentClocksThrottleReasons, NVML_DEVICE_GET_CURRENTCLOCKSTHROTTLEREASONS, NVML, 0); HC_LOAD_FUNC(nvml, nvmlDeviceGetSupportedClocksThrottleReasons, NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREASONS, NVML, 0); HC_LOAD_FUNC(nvml, nvmlDeviceGetPciInfo, NVML_DEVICE_GET_PCIINFO, NVML, 0); + HC_LOAD_FUNC(nvml, nvmlDeviceGetMemoryInfo, NVML_DEVICE_GET_MEMORYINFO, NVML, 0); return 0; } @@ -392,3 +393,24 @@ int hm_NVML_nvmlDeviceGetPciInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlPc return 0; } + +int hm_NVML_nvmlDeviceGetMemoryInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlMemory_t *mem) +{ + hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx; + + NVML_PTR *nvml = (NVML_PTR *) hwmon_ctx->hm_nvml; + + const nvmlReturn_t nvml_rc = nvml->nvmlDeviceGetMemoryInfo (device, mem); + + if (nvml_rc != NVML_SUCCESS) + { + const char *string = hm_NVML_nvmlErrorString (nvml, nvml_rc); + + event_log_error (hashcat_ctx, "nvmlDeviceGetMemoryInfo(): %s", string); + + return -1; + } + + return 0; +} + diff --git a/src/ext_sysfs_amdgpu.c b/src/ext_sysfs_amdgpu.c index 1aa53b210..70f071649 100644 --- a/src/ext_sysfs_amdgpu.c +++ b/src/ext_sysfs_amdgpu.c @@ -441,3 +441,55 @@ int hm_SYSFS_AMDGPU_get_gpu_busy_percent (void *hashcat_ctx, const int backend_d return 0; } + +int hm_SYSFS_AMDGPU_get_mem_info_vram_used (void *hashcat_ctx, const int backend_device_idx, u64 *val) +{ + char *syspath = hm_SYSFS_AMDGPU_get_syspath_device (hashcat_ctx, backend_device_idx); + + if (syspath == NULL) return -1; + + char *path; + + hc_asprintf (&path, "%s/mem_info_vram_used", syspath); + + hcfree (syspath); + + HCFILE fp; + + if (hc_fopen (&fp, path, "r") == false) + { + event_log_error (hashcat_ctx, "%s: %s", path, strerror (errno)); + + hcfree (path); + + return -1; + } + + u64 mem_info_vram_used = 0; + + while (!hc_feof (&fp)) + { + char buf[HCBUFSIZ_TINY]; + + char *ptr = hc_fgets (buf, sizeof (buf), &fp); + + if (ptr == NULL) continue; + + size_t len = strlen (ptr); + + if (len < 1) continue; + + int rc = sscanf (ptr, "%" PRIu64, &mem_info_vram_used); + + if (rc == 1) break; + } + + hc_fclose (&fp); + + *val = mem_info_vram_used; + + hcfree (path); + + return 0; +} + diff --git a/src/hwmon.c b/src/hwmon.c index a0f24c644..4f5264b3d 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -1214,6 +1214,60 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back return -1; } +u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx) +{ + hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx; + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + if (hwmon_ctx->enabled == false) return 0; + + if (hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported == false) return 0; + + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true) || (backend_ctx->devices_param[backend_device_idx].is_cuda == true)) + { + if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) + { + if (hwmon_ctx->hm_sysfs_amdgpu) + { + u64 used = 0; + + if (hm_SYSFS_AMDGPU_get_mem_info_vram_used (hashcat_ctx, backend_device_idx, &used) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false; + + return 0; + } + + return used; + } + } + + if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV) + { + if (hwmon_ctx->hm_nvml) + { + nvmlMemory_t mem; + + if (hm_NVML_nvmlDeviceGetMemoryInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &mem) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false; + + return 0; + } + + return mem.used; + } + } + } + } + + hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false; + + return 0; +} + int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) { bridge_ctx_t *bridge_ctx = hashcat_ctx->bridge_ctx; @@ -1227,12 +1281,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) if (bridge_ctx->enabled == true) backend_devices_cnt = 1; - #if !defined (WITH_HWMON) - return 0; - #endif // WITH_HWMON + //#if !defined (WITH_HWMON) + //return 0; + //#endif // WITH_HWMON if (user_options->usage > 0) return 0; - if (user_options->backend_info > 0) return 0; + //if (user_options->backend_info > 0) return 0; if (user_options->hash_info == true) return 0; if (user_options->keyspace == true) return 0; @@ -1241,7 +1295,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) if (user_options->stdout_flag == true) return 0; if (user_options->version == true) return 0; if (user_options->identify == true) return 0; - if (user_options->hwmon == false) return 0; + //we need hwmon support to get free memory per device support + //its a joke, but there's no way around + //if (user_options->hwmon == false) return 0; hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t)); @@ -1387,6 +1443,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_adapters_nvml[device_id].threshold_shutdown_get_supported = true; hm_adapters_nvml[device_id].threshold_slowdown_get_supported = true; hm_adapters_nvml[device_id].utilization_get_supported = true; + hm_adapters_nvml[device_id].memoryused_get_supported = true; } } } @@ -1419,6 +1476,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_adapters_nvml[device_id].threshold_shutdown_get_supported = true; hm_adapters_nvml[device_id].threshold_slowdown_get_supported = true; hm_adapters_nvml[device_id].utilization_get_supported = true; + hm_adapters_nvml[device_id].memoryused_get_supported = true; } } } @@ -1640,6 +1698,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true; hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true; hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true; + hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported = true; } } } @@ -1746,6 +1805,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported; hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_nvml[device_id].throttle_get_supported; hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_nvml[device_id].utilization_get_supported; + hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_nvml[device_id].memoryused_get_supported; } if (hwmon_ctx->hm_nvapi) @@ -1875,6 +1935,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_sysfs_amdgpu[device_id].threshold_slowdown_get_supported; hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_sysfs_amdgpu[device_id].throttle_get_supported; hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported; + hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported; } } @@ -1895,6 +1956,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported; hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_nvml[device_id].throttle_get_supported; hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_nvml[device_id].utilization_get_supported; + hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_nvml[device_id].memoryused_get_supported; } if (hwmon_ctx->hm_nvapi) @@ -1927,6 +1989,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx, backend_devices_idx); hm_get_throttle_with_devices_idx (hashcat_ctx, backend_devices_idx); hm_get_utilization_with_devices_idx (hashcat_ctx, backend_devices_idx); + hm_get_memoryused_with_devices_idx (hashcat_ctx, backend_devices_idx); } FREE_ADAPTERS; diff --git a/src/modules/module_08900.c b/src/modules/module_08900.c index 0865e8575..42fd456be 100644 --- a/src/modules/module_08900.c +++ b/src/modules/module_08900.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_SCRYPT = "SCRYPT"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -527,7 +464,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_09300.c b/src/modules/module_09300.c index 8f92e7fce..4f0f5bbb5 100644 --- a/src/modules/module_09300.c +++ b/src/modules/module_09300.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_CISCO9 = "$9$"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 1; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = req1 * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -488,7 +425,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c index c7a357dd0..063106d2f 100644 --- a/src/modules/module_15700.c +++ b/src/modules/module_15700.c @@ -56,6 +56,8 @@ typedef struct ethereum_scrypt static const char *SIGNATURE_ETHEREUM_SCRYPT = "$ethereum$s"; +static const u32 SCRYPT_THREADS = 4; + static const u64 SCRYPT_N = 262144; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -74,9 +76,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 4; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -98,90 +107,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -193,115 +234,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -587,7 +524,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_22700.c b/src/modules/module_22700.c index 30c106625..1b9113bd4 100644 --- a/src/modules/module_22700.c +++ b/src/modules/module_22700.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_MULTIBIT = "$multibit$"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -526,7 +463,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_24000.c b/src/modules/module_24000.c index 62217f2d8..159acbed0 100644 --- a/src/modules/module_24000.c +++ b/src/modules/module_24000.c @@ -57,27 +57,13 @@ typedef struct bestcrypt_scrypt // 16 is actually a bit low, we may need to change this depending on user response static const char *SIGNATURE_BESTCRYPT_SCRYPT = "$bcve$"; -static const u32 SCRYPT_MAX_ACCEL = 256; -static const u32 SCRYPT_MAX_THREADS = 4; + +static const u32 SCRYPT_THREADS = 16; static const u64 SCRYPT_N = 32768; static const u64 SCRYPT_R = 16; static const u64 SCRYPT_P = 1; -u32 module_kernel_accel_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_accel_min = 1; - - return kernel_accel_min; -} - -u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : SCRYPT_MAX_ACCEL; - - return kernel_accel_max; -} - u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 kernel_loops_min = 1; @@ -94,14 +80,14 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_min = 1; + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_min; } u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_MAX_THREADS; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -123,90 +109,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -215,121 +233,14 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // we need to set the self-test hash settings to pass the self-test // the decoder for the self-test is called after this function - const u32 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; - const u32 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; + const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; + const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u32 tmto_start = 1; - u32 tmto_stop = 6; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - -/* - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } -*/ - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit -/* - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); -*/ - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks; -// + size_pws_pre -// + size_pws_base; -/* - + EXTRA_SPACE; -*/ - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u32 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -593,8 +504,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook_size = MODULE_DEFAULT; module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; - module_ctx->module_kernel_accel_max = module_kernel_accel_max; - module_ctx->module_kernel_accel_min = module_kernel_accel_min; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; diff --git a/src/modules/module_27700.c b/src/modules/module_27700.c index 089deb5fa..fb3a31fa1 100644 --- a/src/modules/module_27700.c +++ b/src/modules/module_27700.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_MULTIBIT = "$multibit$"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -550,7 +487,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_28200.c b/src/modules/module_28200.c index 86a636adf..52a7adbdd 100644 --- a/src/modules/module_28200.c +++ b/src/modules/module_28200.c @@ -57,6 +57,8 @@ typedef struct exodus static const char *SIGNATURE_EXODUS = "EXODUS"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -75,9 +77,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -96,90 +105,122 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED return esalt_size; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -191,115 +232,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -634,7 +571,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_29800.c b/src/modules/module_29800.c index d1be6be39..633ef1978 100644 --- a/src/modules/module_29800.c +++ b/src/modules/module_29800.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_BISQ = "$bisq$"; +static const u32 SCRYPT_THREADS = 16; + static const u64 SCRYPT_N = 32768; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 6; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -91,90 +100,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -186,115 +227,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -557,7 +494,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/tuningdb.c b/src/tuningdb.c index 406359ab4..1c5e6cb32 100644 --- a/src/tuningdb.c +++ b/src/tuningdb.c @@ -43,11 +43,6 @@ int sort_by_tuning_db_entry (const void *v1, const void *v2) if (res3 != 0) return (res3); - const int res4 = t1->source - - t2->source; - - if (res4 != 0) return (res4); - return 0; } @@ -118,7 +113,7 @@ int tuning_db_init (hashcat_ctx_t *hashcat_ctx) if (line_buf[0] == '#') continue; - tuning_db_process_line (hashcat_ctx, line_buf, line_num, 1); + tuning_db_process_line (hashcat_ctx, line_buf, line_num); } hcfree (buf); @@ -167,7 +162,7 @@ void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx) memset (tuning_db, 0, sizeof (tuning_db_t)); } -bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num, const int source) +bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num) { tuning_db_t *tuning_db = hashcat_ctx->tuning_db; user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; @@ -353,7 +348,6 @@ bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, c entry->vector_width = vector_width; entry->kernel_accel = kernel_accel; entry->kernel_loops = kernel_loops; - entry->source = source; tuning_db->entry_cnt++; } @@ -430,12 +424,11 @@ static tuning_db_entry_t *tuning_db_search_real (hashcat_ctx_t *hashcat_ctx, con // this will produce all 2^3 combinations required - for (i = 0; i < 16; i++) + for (i = 0; i < 8; i++) { - s.source = (i & 1) ? 2 : 1; + s.device_name = (i & 1) ? "*" : device_name_nospace; s.attack_mode = (i & 2) ? -1 : attack_mode; s.hash_mode = (i & 4) ? -1 : hash_mode; - s.device_name = (i & 8) ? "*" : device_name_nospace; entry = (tuning_db_entry_t *) bsearch (&s, tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry); @@ -443,7 +436,7 @@ static tuning_db_entry_t *tuning_db_search_real (hashcat_ctx_t *hashcat_ctx, con // in non-wildcard mode do some additional checks: - if ((i & 8) == 0) + if ((i & 1) == 0) { // in case we have an alias-name diff --git a/src/user_options.c b/src/user_options.c index 217e8d3f3..7dbe6567d 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -379,8 +379,8 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv) case IDX_INCREMENT_MAX: case IDX_HOOK_THREADS: case IDX_BACKEND_DEVICES_VIRTMULTI: - case IDX_BACKEND_DEVICES_VIRTHOST: - case IDX_BACKEND_DEVICES_KEEPFREE: + case IDX_BACKEND_DEVICES_VIRTHOST: + case IDX_BACKEND_DEVICES_KEEPFREE: case IDX_BENCHMARK_MAX: case IDX_BENCHMARK_MIN: #ifdef WITH_BRAIN @@ -816,14 +816,14 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx) event_log_error (hashcat_ctx, "Invalid --backend-devices-virthost value specified."); return -1; - } + } if (user_options->backend_devices_keepfree > 100) { event_log_error (hashcat_ctx, "Invalid --backend-devices-keepfree value specified."); return -1; - } + } if (user_options->outfile_format == 0) { @@ -1895,6 +1895,14 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx) } #endif + if (user_options->hwmon == false) + { + // some algorithm, such as SCRYPT, depend on accurate free memory values + // the only way to get them is through low-level APIs such as nvml via hwmon + + user_options->hwmon = true; + } + if (user_options->stdout_flag) { user_options->hwmon = false; @@ -3325,8 +3333,8 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx) logfile_top_uint64 (user_options->skip); logfile_top_uint (user_options->attack_mode); logfile_top_uint (user_options->backend_devices_virtmulti); - logfile_top_uint (user_options->backend_devices_virthost); - logfile_top_uint (user_options->backend_devices_keepfree); + logfile_top_uint (user_options->backend_devices_virthost); + logfile_top_uint (user_options->backend_devices_keepfree); logfile_top_uint (user_options->benchmark); logfile_top_uint (user_options->benchmark_all); logfile_top_uint (user_options->benchmark_max); diff --git a/tunings/Module_08900.hctune b/tunings/Module_08900.hctune index ecaa0e353..46df052b5 100644 --- a/tunings/Module_08900.hctune +++ b/tunings/Module_08900.hctune @@ -24,4 +24,3 @@ # It's better to derive the tuning based on the hash information (handled by the hash-mode plugin). # The tunings from the hash-mode plugin may be slightly off, so if you have better values, you can hardcode them here. - diff --git a/tunings/Module_09300.hctune b/tunings/Module_09300.hctune index 3277390ab..d98505795 100644 --- a/tunings/Module_09300.hctune +++ b/tunings/Module_09300.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 9300 1 512 A -ALIAS_AMD_RX6900XT * 9300 1 720 A -ALIAS_AMD_RX7900XTX * 9300 1 840 A - diff --git a/tunings/Module_15700.hctune b/tunings/Module_15700.hctune index c19ae375e..a44bd5a9c 100644 --- a/tunings/Module_15700.hctune +++ b/tunings/Module_15700.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 15700 1 180 A -ALIAS_AMD_RX6900XT * 15700 1 56 A -ALIAS_AMD_RX7900XTX * 15700 1 92 A - diff --git a/tunings/Module_22700.hctune b/tunings/Module_22700.hctune index be4cd8a4a..c08bd7a51 100644 --- a/tunings/Module_22700.hctune +++ b/tunings/Module_22700.hctune @@ -19,7 +19,14 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 22700 1 180 A -ALIAS_AMD_RX6900XT * 22700 1 56 A -ALIAS_AMD_RX7900XTX * 22700 1 92 A +#Leaving this here as a reference +#GeForce_GTX_980 * 22700 1 28 A +#GeForce_GTX_1630 * 22700 1 11 A +#GeForce_RTX_2080_Ti * 22700 1 78 A +#GeForce_RTX_3090 * 22700 1 82 A +#GeForce_RTX_4090 * 22700 1 180 A +#ALIAS_AMD_RX480 * 22700 1 28 A +#ALIAS_AMD_Vega64 * 22700 1 28 A +#ALIAS_AMD_RX6900XT * 22700 1 56 A +#ALIAS_AMD_RX7900XTX * 22700 1 92 A diff --git a/tunings/Module_24000.hctune b/tunings/Module_24000.hctune index 71f61fe67..52e4b78bb 100644 --- a/tunings/Module_24000.hctune +++ b/tunings/Module_24000.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 24000 1 180 A -ALIAS_AMD_RX6900XT * 24000 1 56 A -ALIAS_AMD_RX7900XTX * 24000 1 92 A - diff --git a/tunings/Module_27700.hctune b/tunings/Module_27700.hctune index 32b5253b4..095c829f6 100644 --- a/tunings/Module_27700.hctune +++ b/tunings/Module_27700.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 27700 1 180 A -ALIAS_AMD_RX6900XT * 27700 1 56 A -ALIAS_AMD_RX7900XTX * 27700 1 92 A - diff --git a/tunings/Module_28200.hctune b/tunings/Module_28200.hctune index 50a09b89c..2759beb00 100644 --- a/tunings/Module_28200.hctune +++ b/tunings/Module_28200.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 28200 1 180 A -ALIAS_AMD_RX6900XT * 28200 1 56 A -ALIAS_AMD_RX7900XTX * 28200 1 92 A - diff --git a/tunings/Module_29800.hctune b/tunings/Module_29800.hctune index 31bea6286..ce9ebd31d 100644 --- a/tunings/Module_29800.hctune +++ b/tunings/Module_29800.hctune @@ -18,8 +18,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops - -GeForce_RTX_4090 * 29800 1 180 A -ALIAS_AMD_RX6900XT * 29800 1 56 A -ALIAS_AMD_RX7900XTX * 29800 1 92 A -