1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-23 15:08:37 +00:00

Improvements to SCRYPT autotuning strategy

General:

The logic for calculating the SCRYPT workload has been moved
from module_extra_buffer_size() to module_extra_tuningdb_block().
Previously, this function just returned values from a static
tuning file. Now, it actually computes tuning values on the fly
based on the device's resources and SCRYPT parameters. This
was always possible, it just wasn't used that way until now.

After running the calculation, the calculated kernel_accel value
is injected into the tuning database as if it had come from a
file. The tmto value is stored internally.

Users can still override kernel-threads, kernel-accel, and
scrypt-tmto via the command line or via tuningdb file.

module_extra_tuningdb_block():

This is now where kernel_accel and tmto are automatically
calculated.

The logic for accel and tmto is now separated and more
flexible. Whether the user is using defaults, tuningdb entries, or
manual command line overrides, the code logic will try to make
smart choices based on what's actually available on the device.

First, it tries to find a kernel_accel value that fits into
available memory. It starts with a base value and simulates
tmto=1 or 2 (which is typical good on GPU).

It also leaves room for other buffers (like pws[], tmps[], etc.).
If the result is close to the actual processor count,
it gets clamped.

This value is then added to the tuning database, so hashcat can pick
it up during startup.

Once that's set, it derives tmto using available memory, thread
count, and the actual SCRYPT parameters.

module_extra_buffer_size():

This function now just returns the size of the SCRYPT B[] buffer,
based on the tmto that was already calculated.

kernel_threads:

Defaults are now set to 32 threads in most cases. On AMD GPUs,
64 threads might give a slight performance bump, but 32 is more
consistent and reliable.

For very memory-heavy algorithms (like Ethereum Wallet), it
scales down the thread count.

Here's a rough reference for other SCRYPT-based modes:

- 64 MiB: 16 threads
- 256 MiB: 4 threads

Tuning files:

All built-in tuningdb entries have been removed, because they
shouldn’t be needed anymore. But you can still add custom entries
if needed. There’s even a commented-out example in the tuningdb
file for mode 22700.

Free memory handling:

Getting the actual amount of free GPU memory is critical for
this to work right. Unfortunately, none of the common GPGPU APIs
give reliable numbers. We now query low-level interfaces like
SYSFS (AMD) and NVML (NVIDIA). Support for those APIs is in
place already, except for ADL, which still needs to be added.

Because of this, hwmon support (which handles those low-level
queries) can no longer be disabled.
This commit is contained in:
Jens Steube 2025-06-09 11:02:34 +02:00
parent c7d96b40e9
commit c87a87f992
28 changed files with 941 additions and 1296 deletions

View File

@ -161,6 +161,18 @@ typedef enum nvmlGom_enum
* */
#define nvmlClocksThrottleReasonNone 0x0000000000000000LL
/**
* Memory allocation information for a device (v1).
* The total amount is equal to the sum of the amounts of free and used memory.
*/
typedef struct nvmlMemory_st
{
unsigned long long total; //!< Total physical device memory (in bytes)
unsigned long long free; //!< Unallocated device memory (in bytes)
unsigned long long used; //!< Sum of Reserved and Allocated device memory (in bytes).
//!< Note that the driver/GPU always sets aside a small amount of memory for bookkeeping
} nvmlMemory_t;
/*
* End of declarations from nvml.h
**/
@ -191,6 +203,7 @@ typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREAS
typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_SET_COMPUTEMODE) (nvmlDevice_t, nvmlComputeMode_t);
typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_SET_OPERATIONMODE) (nvmlDevice_t, nvmlGpuOperationMode_t);
typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_PCIINFO) (nvmlDevice_t, nvmlPciInfo_t *);
typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_MEMORYINFO) (nvmlDevice_t, nvmlMemory_t *);
typedef struct hm_nvml_lib
{
@ -212,6 +225,7 @@ typedef struct hm_nvml_lib
NVML_DEVICE_GET_CURRENTCLOCKSTHROTTLEREASONS nvmlDeviceGetCurrentClocksThrottleReasons;
NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREASONS nvmlDeviceGetSupportedClocksThrottleReasons;
NVML_DEVICE_GET_PCIINFO nvmlDeviceGetPciInfo;
NVML_DEVICE_GET_MEMORYINFO nvmlDeviceGetMemoryInfo;
} hm_nvml_lib_t;
@ -232,5 +246,6 @@ int hm_NVML_nvmlDeviceGetClockInfo (void *hashcat_ctx, nvmlDevice_t device, nvml
int hm_NVML_nvmlDeviceGetTemperatureThreshold (void *hashcat_ctx, nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp);
int hm_NVML_nvmlDeviceGetCurrPcieLinkWidth (void *hashcat_ctx, nvmlDevice_t device, unsigned int *currLinkWidth);
int hm_NVML_nvmlDeviceGetPciInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlPciInfo_t *pci);
int hm_NVML_nvmlDeviceGetMemoryInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlMemory_t *mem);
#endif // HC_NVML_H

View File

@ -34,5 +34,6 @@ int hm_SYSFS_AMDGPU_get_pp_dpm_sclk (void *hashcat_ctx, const int backend_device
int hm_SYSFS_AMDGPU_get_pp_dpm_mclk (void *hashcat_ctx, const int backend_device_idx, int *val);
int hm_SYSFS_AMDGPU_get_pp_dpm_pcie (void *hashcat_ctx, const int backend_device_idx, int *val);
int hm_SYSFS_AMDGPU_get_gpu_busy_percent (void *hashcat_ctx, const int backend_device_idx, int *val);
int hm_SYSFS_AMDGPU_get_mem_info_vram_used (void *hashcat_ctx, const int backend_device_idx, u64 *val);
#endif // HC_EXT_SYSFS_AMDGPU_H

View File

@ -24,6 +24,7 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx);
void hwmon_ctx_destroy (hashcat_ctx_t *hashcat_ctx);

View File

@ -20,7 +20,7 @@ u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *ha
u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes);
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel);
u32 module_forced_outfile_format (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);

View File

@ -17,7 +17,7 @@ int sort_by_tuning_db_entry (const void *v1, const void *v2);
int tuning_db_init (hashcat_ctx_t *hashcat_ctx);
void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx);
bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num, const int source);
bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num);
tuning_db_entry_t *tuning_db_search (hashcat_ctx_t *hashcat_ctx, const char *device_name, const cl_device_type device_type, int attack_mode, const int hash_mode);
#endif // HC_TUNINGDB_H

View File

@ -2067,6 +2067,7 @@ typedef struct hm_attrs
bool threshold_slowdown_get_supported;
bool throttle_get_supported;
bool utilization_get_supported;
bool memoryused_get_supported;
} hm_attrs_t;
@ -3013,7 +3014,7 @@ typedef struct module_ctx
u32 (*module_dgst_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
bool (*module_dictstat_disable) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
u64 (*module_esalt_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *, const backend_ctx_t *, const hashes_t *);
const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *, const backend_ctx_t *, const hashes_t *, const u32, const u32);
u32 (*module_forced_outfile_format) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
u32 (*module_hash_category) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
const char *(*module_hash_name) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);

View File

@ -24,6 +24,7 @@
#include "dynloader.h"
#include "backend.h"
#include "terminal.h"
#include "hwmon.h"
#if defined (__linux__)
static const char *const dri_card0_path = "/dev/dri/card0";
@ -9649,7 +9650,44 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT)
{
const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra, backend_ctx, hashes);
// We need this because we can't trust CUDA/HIP to give us the real free device memory
// The only way to do so is through low level APIs
for (int i = 0; i < 10; i++)
{
const u64 used_bytes = hm_get_memoryused_with_devices_idx (hashcat_ctx, device_id);
if (used_bytes)
{
if ((used_bytes > (2ULL * 1024 * 1024 * 1024))
|| (used_bytes > (device_param->device_global_mem * 0.5)))
{
event_log_warning (hashcat_ctx, "* Device #%u: Memory usage is too high: %" PRIu64 "/%" PRIu64 ", waiting...", device_id + 1, used_bytes, device_param->device_global_mem);
sleep (1);
continue;
}
device_param->device_available_mem -= used_bytes;
break;
}
else
{
break;
}
}
u32 _kernel_accel = 0;
tuning_db_entry_t *tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
if (tuningdb_entry != NULL) _kernel_accel = tuningdb_entry->kernel_accel;
if (user_options->kernel_accel_chgd == true) _kernel_accel = user_options->kernel_accel;
const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra, backend_ctx, hashes, device_id, _kernel_accel);
char *lines_buf = hcstrdup (extra_tuningdb_block);
@ -9669,7 +9707,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (next[0] == '#') continue;
tuning_db_process_line (hashcat_ctx, next, line_num, 2);
tuning_db_process_line (hashcat_ctx, next, line_num);
} while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL);

View File

@ -149,6 +149,7 @@ int nvml_init (void *hashcat_ctx)
HC_LOAD_FUNC(nvml, nvmlDeviceGetCurrentClocksThrottleReasons, NVML_DEVICE_GET_CURRENTCLOCKSTHROTTLEREASONS, NVML, 0);
HC_LOAD_FUNC(nvml, nvmlDeviceGetSupportedClocksThrottleReasons, NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREASONS, NVML, 0);
HC_LOAD_FUNC(nvml, nvmlDeviceGetPciInfo, NVML_DEVICE_GET_PCIINFO, NVML, 0);
HC_LOAD_FUNC(nvml, nvmlDeviceGetMemoryInfo, NVML_DEVICE_GET_MEMORYINFO, NVML, 0);
return 0;
}
@ -392,3 +393,24 @@ int hm_NVML_nvmlDeviceGetPciInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlPc
return 0;
}
int hm_NVML_nvmlDeviceGetMemoryInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlMemory_t *mem)
{
hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx;
NVML_PTR *nvml = (NVML_PTR *) hwmon_ctx->hm_nvml;
const nvmlReturn_t nvml_rc = nvml->nvmlDeviceGetMemoryInfo (device, mem);
if (nvml_rc != NVML_SUCCESS)
{
const char *string = hm_NVML_nvmlErrorString (nvml, nvml_rc);
event_log_error (hashcat_ctx, "nvmlDeviceGetMemoryInfo(): %s", string);
return -1;
}
return 0;
}

View File

@ -441,3 +441,55 @@ int hm_SYSFS_AMDGPU_get_gpu_busy_percent (void *hashcat_ctx, const int backend_d
return 0;
}
int hm_SYSFS_AMDGPU_get_mem_info_vram_used (void *hashcat_ctx, const int backend_device_idx, u64 *val)
{
char *syspath = hm_SYSFS_AMDGPU_get_syspath_device (hashcat_ctx, backend_device_idx);
if (syspath == NULL) return -1;
char *path;
hc_asprintf (&path, "%s/mem_info_vram_used", syspath);
hcfree (syspath);
HCFILE fp;
if (hc_fopen (&fp, path, "r") == false)
{
event_log_error (hashcat_ctx, "%s: %s", path, strerror (errno));
hcfree (path);
return -1;
}
u64 mem_info_vram_used = 0;
while (!hc_feof (&fp))
{
char buf[HCBUFSIZ_TINY];
char *ptr = hc_fgets (buf, sizeof (buf), &fp);
if (ptr == NULL) continue;
size_t len = strlen (ptr);
if (len < 1) continue;
int rc = sscanf (ptr, "%" PRIu64, &mem_info_vram_used);
if (rc == 1) break;
}
hc_fclose (&fp);
*val = mem_info_vram_used;
hcfree (path);
return 0;
}

View File

@ -1214,6 +1214,60 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
return -1;
}
u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
{
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
if (hwmon_ctx->enabled == false) return 0;
if (hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported == false) return 0;
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true) || (backend_ctx->devices_param[backend_device_idx].is_cuda == true))
{
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
{
if (hwmon_ctx->hm_sysfs_amdgpu)
{
u64 used = 0;
if (hm_SYSFS_AMDGPU_get_mem_info_vram_used (hashcat_ctx, backend_device_idx, &used) == -1)
{
hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false;
return 0;
}
return used;
}
}
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
{
if (hwmon_ctx->hm_nvml)
{
nvmlMemory_t mem;
if (hm_NVML_nvmlDeviceGetMemoryInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &mem) == -1)
{
hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false;
return 0;
}
return mem.used;
}
}
}
}
hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false;
return 0;
}
int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
{
bridge_ctx_t *bridge_ctx = hashcat_ctx->bridge_ctx;
@ -1227,12 +1281,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (bridge_ctx->enabled == true) backend_devices_cnt = 1;
#if !defined (WITH_HWMON)
return 0;
#endif // WITH_HWMON
//#if !defined (WITH_HWMON)
//return 0;
//#endif // WITH_HWMON
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
//if (user_options->backend_info > 0) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
@ -1241,7 +1295,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->stdout_flag == true) return 0;
if (user_options->version == true) return 0;
if (user_options->identify == true) return 0;
if (user_options->hwmon == false) return 0;
//we need hwmon support to get free memory per device support
//its a joke, but there's no way around
//if (user_options->hwmon == false) return 0;
hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
@ -1387,6 +1443,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hm_adapters_nvml[device_id].threshold_shutdown_get_supported = true;
hm_adapters_nvml[device_id].threshold_slowdown_get_supported = true;
hm_adapters_nvml[device_id].utilization_get_supported = true;
hm_adapters_nvml[device_id].memoryused_get_supported = true;
}
}
}
@ -1419,6 +1476,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hm_adapters_nvml[device_id].threshold_shutdown_get_supported = true;
hm_adapters_nvml[device_id].threshold_slowdown_get_supported = true;
hm_adapters_nvml[device_id].utilization_get_supported = true;
hm_adapters_nvml[device_id].memoryused_get_supported = true;
}
}
}
@ -1640,6 +1698,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported = true;
}
}
}
@ -1746,6 +1805,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_nvml[device_id].throttle_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_nvml[device_id].utilization_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_nvml[device_id].memoryused_get_supported;
}
if (hwmon_ctx->hm_nvapi)
@ -1875,6 +1935,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_sysfs_amdgpu[device_id].threshold_slowdown_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_sysfs_amdgpu[device_id].throttle_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported;
}
}
@ -1895,6 +1956,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_nvml[device_id].throttle_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_nvml[device_id].utilization_get_supported;
hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_nvml[device_id].memoryused_get_supported;
}
if (hwmon_ctx->hm_nvapi)
@ -1927,6 +1989,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx, backend_devices_idx);
hm_get_throttle_with_devices_idx (hashcat_ctx, backend_devices_idx);
hm_get_utilization_with_devices_idx (hashcat_ctx, backend_devices_idx);
hm_get_memoryused_with_devices_idx (hashcat_ctx, backend_devices_idx);
}
FREE_ADAPTERS;

View File

@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig,
static const char *SIGNATURE_SCRYPT = "SCRYPT";
static const u32 SCRYPT_THREADS = 32;
static const u64 SCRYPT_N = 16384;
static const u64 SCRYPT_R = 8;
static const u64 SCRYPT_P = 1;
@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -84,26 +93,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -112,61 +205,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -527,7 +464,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig,
static const char *SIGNATURE_CISCO9 = "$9$";
static const u32 SCRYPT_THREADS = 32;
static const u64 SCRYPT_N = 16384;
static const u64 SCRYPT_R = 1;
static const u64 SCRYPT_P = 1;
@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -84,26 +93,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -112,61 +205,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = req1 * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -488,7 +425,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -56,6 +56,8 @@ typedef struct ethereum_scrypt
static const char *SIGNATURE_ETHEREUM_SCRYPT = "$ethereum$s";
static const u32 SCRYPT_THREADS = 4;
static const u64 SCRYPT_N = 262144;
static const u64 SCRYPT_R = 8;
static const u64 SCRYPT_P = 1;
@ -74,9 +76,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 4;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -98,26 +107,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -126,61 +219,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -193,115 +234,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -587,7 +524,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig,
static const char *SIGNATURE_MULTIBIT = "$multibit$";
static const u32 SCRYPT_THREADS = 32;
static const u64 SCRYPT_N = 16384;
static const u64 SCRYPT_R = 8;
static const u64 SCRYPT_P = 1;
@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -84,26 +93,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -112,61 +205,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -526,7 +463,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -57,27 +57,13 @@ typedef struct bestcrypt_scrypt
// 16 is actually a bit low, we may need to change this depending on user response
static const char *SIGNATURE_BESTCRYPT_SCRYPT = "$bcve$";
static const u32 SCRYPT_MAX_ACCEL = 256;
static const u32 SCRYPT_MAX_THREADS = 4;
static const u32 SCRYPT_THREADS = 16;
static const u64 SCRYPT_N = 32768;
static const u64 SCRYPT_R = 16;
static const u64 SCRYPT_P = 1;
u32 module_kernel_accel_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_accel_min = 1;
return kernel_accel_min;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : SCRYPT_MAX_ACCEL;
return kernel_accel_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 1;
@ -94,14 +80,14 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 1;
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_MAX_THREADS;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -123,26 +109,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -151,61 +221,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -215,121 +233,14 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
// we need to set the self-test hash settings to pass the self-test
// the decoder for the self-test is called after this function
const u32 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u32 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u32 tmto_start = 1;
u32 tmto_stop = 6;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
/*
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
*/
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
/*
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
*/
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks;
// + size_pws_pre
// + size_pws_base;
/*
+ EXTRA_SPACE;
*/
bool not_enough_memory = true;
u64 size_scrypt = 0;
u32 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -593,8 +504,8 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = module_kernel_accel_min;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;

View File

@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig,
static const char *SIGNATURE_MULTIBIT = "$multibit$";
static const u32 SCRYPT_THREADS = 32;
static const u64 SCRYPT_N = 16384;
static const u64 SCRYPT_R = 8;
static const u64 SCRYPT_P = 1;
@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -84,26 +93,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -112,61 +205,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -550,7 +487,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -57,6 +57,8 @@ typedef struct exodus
static const char *SIGNATURE_EXODUS = "EXODUS";
static const u32 SCRYPT_THREADS = 32;
static const u64 SCRYPT_N = 16384;
static const u64 SCRYPT_R = 8;
static const u64 SCRYPT_P = 1;
@ -75,9 +77,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -96,26 +105,110 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
return esalt_size;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -124,61 +217,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -191,115 +232,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -634,7 +571,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig,
static const char *SIGNATURE_BISQ = "$bisq$";
static const u32 SCRYPT_THREADS = 16;
static const u64 SCRYPT_N = 32768;
static const u64 SCRYPT_R = 8;
static const u64 SCRYPT_P = 6;
@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
return kernel_loops_max;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32;
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS;
return kernel_threads_max;
}
@ -91,26 +100,110 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes)
u32 tmto = 0;
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel)
{
// preprocess tmto in case user has overridden
// it's important to set to 0 otherwise so we can postprocess tmto in that case
tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0;
// we enforce the same configuration for all hashes, so this should be fine
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 device_processors = device_param->device_processors;
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4));
u32 kernel_accel_new = device_processors;
if (kernel_accel)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// from command line or tuning db has priority
if (device_param->skipped == true) continue;
kernel_accel_new = user_options->kernel_accel;
}
else
{
// find a nice kernel_accel programmatically
const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1);
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if ((size_per_accel * device_processors) > available_mem) // not enough memory
{
const float multi = (float) available_mem / size_per_accel;
int accel_multi;
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
{
kernel_accel_new = multi * (1 << accel_multi);
if (kernel_accel_new >= device_processors) break;
}
// we need some space for tmps[], ...
kernel_accel_new -= (1 << accel_multi);
// clamp if close to device processors -- 10% good?
if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10)))
{
kernel_accel_new = device_processors;
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((size_per_accel * device_processors * i) < available_mem)
{
kernel_accel_new = device_processors * i;
}
}
}
}
// fix tmto if user allows
if (tmto == 0)
{
const u32 tmto_start = 1;
const u32 tmto_stop = 5;
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
{
if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new)))
{
tmto = tmto_new;
break;
}
}
}
char *new_device_name = hcstrdup (device_param->device_name);
@ -119,61 +212,9 @@ const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashco
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
char *out_name = new_device_name;
if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4;
if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7;
// ok, try to find a nice accel programmatically
u32 accel = device_param->device_processors;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// expect to change any of this
if (avail < (req1 * accel)) // not enough memory
{
const float multi = (float) avail / req1;
accel = multi;
for (int i = 1; i <= 4; i++) // this is tmto
{
if (device_param->device_processors > accel)
{
accel = ((u64) multi << i) & ~3;
}
}
}
else
{
for (int i = 1; i <= 8; i++)
{
if ((avail * 2) > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
}
else
{
const u64 req1 = 128 * scrypt_r * scrypt_N;
for (int i = 1; i <= 8; i++)
{
if (avail > (req1 * accel))
{
accel = device_param->device_processors * i;
}
}
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel);
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
}
return lines_buf;
}
@ -186,115 +227,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N;
const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R;
const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra);
u64 tmto_start = 0;
u64 tmto_stop = 4;
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
if (user_options->scrypt_tmto_chgd == true)
{
tmto_start = user_options->scrypt_tmto;
tmto_stop = user_options->scrypt_tmto;
}
// size_pws
const u64 size_pws = kernel_power_max * sizeof (pw_t);
const u64 size_pws_amp = size_pws;
// size_pws_comp
const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const u64 size_tmps = kernel_power_max * hashconfig->tmp_size;
// size_hooks
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
+ device_param->size_digests
+ device_param->size_esalts
+ device_param->size_markov_css
+ device_param->size_plains
+ device_param->size_results
+ device_param->size_root_css
+ device_param->size_rules
+ device_param->size_rules_c
+ device_param->size_salts
+ device_param->size_shown
+ device_param->size_tm
+ device_param->size_st_digests
+ device_param->size_st_salts
+ device_param->size_st_esalts
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
u64 size_scrypt = 0;
u64 tmto;
for (tmto = tmto_start; tmto <= tmto_stop; tmto++)
{
size_scrypt = (128ULL * scrypt_r) * scrypt_N;
size_scrypt /= 1ull << tmto;
size_scrypt *= kernel_power_max;
if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue;
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue;
not_enough_memory = false;
break;
}
if (not_enough_memory == true) return -1;
return size_scrypt;
return size_scrypt / (1 << tmto);
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -557,7 +494,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@ -43,11 +43,6 @@ int sort_by_tuning_db_entry (const void *v1, const void *v2)
if (res3 != 0) return (res3);
const int res4 = t1->source
- t2->source;
if (res4 != 0) return (res4);
return 0;
}
@ -118,7 +113,7 @@ int tuning_db_init (hashcat_ctx_t *hashcat_ctx)
if (line_buf[0] == '#') continue;
tuning_db_process_line (hashcat_ctx, line_buf, line_num, 1);
tuning_db_process_line (hashcat_ctx, line_buf, line_num);
}
hcfree (buf);
@ -167,7 +162,7 @@ void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx)
memset (tuning_db, 0, sizeof (tuning_db_t));
}
bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num, const int source)
bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num)
{
tuning_db_t *tuning_db = hashcat_ctx->tuning_db;
user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
@ -353,7 +348,6 @@ bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, c
entry->vector_width = vector_width;
entry->kernel_accel = kernel_accel;
entry->kernel_loops = kernel_loops;
entry->source = source;
tuning_db->entry_cnt++;
}
@ -430,12 +424,11 @@ static tuning_db_entry_t *tuning_db_search_real (hashcat_ctx_t *hashcat_ctx, con
// this will produce all 2^3 combinations required
for (i = 0; i < 16; i++)
for (i = 0; i < 8; i++)
{
s.source = (i & 1) ? 2 : 1;
s.device_name = (i & 1) ? "*" : device_name_nospace;
s.attack_mode = (i & 2) ? -1 : attack_mode;
s.hash_mode = (i & 4) ? -1 : hash_mode;
s.device_name = (i & 8) ? "*" : device_name_nospace;
entry = (tuning_db_entry_t *) bsearch (&s, tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry);
@ -443,7 +436,7 @@ static tuning_db_entry_t *tuning_db_search_real (hashcat_ctx_t *hashcat_ctx, con
// in non-wildcard mode do some additional checks:
if ((i & 8) == 0)
if ((i & 1) == 0)
{
// in case we have an alias-name

View File

@ -1895,6 +1895,14 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
}
#endif
if (user_options->hwmon == false)
{
// some algorithm, such as SCRYPT, depend on accurate free memory values
// the only way to get them is through low-level APIs such as nvml via hwmon
user_options->hwmon = true;
}
if (user_options->stdout_flag)
{
user_options->hwmon = false;

View File

@ -24,4 +24,3 @@
# It's better to derive the tuning based on the hash information (handled by the hash-mode plugin).
# The tunings from the hash-mode plugin may be slightly off, so if you have better values, you can hardcode them here.

View File

@ -19,7 +19,3 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 9300 1 512 A
ALIAS_AMD_RX6900XT * 9300 1 720 A
ALIAS_AMD_RX7900XTX * 9300 1 840 A

View File

@ -19,7 +19,3 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 15700 1 180 A
ALIAS_AMD_RX6900XT * 15700 1 56 A
ALIAS_AMD_RX7900XTX * 15700 1 92 A

View File

@ -19,7 +19,14 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 22700 1 180 A
ALIAS_AMD_RX6900XT * 22700 1 56 A
ALIAS_AMD_RX7900XTX * 22700 1 92 A
#Leaving this here as a reference
#GeForce_GTX_980 * 22700 1 28 A
#GeForce_GTX_1630 * 22700 1 11 A
#GeForce_RTX_2080_Ti * 22700 1 78 A
#GeForce_RTX_3090 * 22700 1 82 A
#GeForce_RTX_4090 * 22700 1 180 A
#ALIAS_AMD_RX480 * 22700 1 28 A
#ALIAS_AMD_Vega64 * 22700 1 28 A
#ALIAS_AMD_RX6900XT * 22700 1 56 A
#ALIAS_AMD_RX7900XTX * 22700 1 92 A

View File

@ -19,7 +19,3 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 24000 1 180 A
ALIAS_AMD_RX6900XT * 24000 1 56 A
ALIAS_AMD_RX7900XTX * 24000 1 92 A

View File

@ -19,7 +19,3 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 27700 1 180 A
ALIAS_AMD_RX6900XT * 27700 1 56 A
ALIAS_AMD_RX7900XTX * 27700 1 92 A

View File

@ -19,7 +19,3 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 28200 1 180 A
ALIAS_AMD_RX6900XT * 28200 1 56 A
ALIAS_AMD_RX7900XTX * 28200 1 92 A

View File

@ -18,8 +18,3 @@
#Device Attack Hash Vector Kernel Kernel
#Name Mode Type Width Accel Loops
GeForce_RTX_4090 * 29800 1 180 A
ALIAS_AMD_RX6900XT * 29800 1 56 A
ALIAS_AMD_RX7900XTX * 29800 1 92 A