diff --git a/include/ext_nvml.h b/include/ext_nvml.h index 02c5d490c..0215e1a32 100644 --- a/include/ext_nvml.h +++ b/include/ext_nvml.h @@ -161,6 +161,18 @@ typedef enum nvmlGom_enum * */ #define nvmlClocksThrottleReasonNone 0x0000000000000000LL +/** + * Memory allocation information for a device (v1). + * The total amount is equal to the sum of the amounts of free and used memory. + */ +typedef struct nvmlMemory_st +{ + unsigned long long total; //!< Total physical device memory (in bytes) + unsigned long long free; //!< Unallocated device memory (in bytes) + unsigned long long used; //!< Sum of Reserved and Allocated device memory (in bytes). + //!< Note that the driver/GPU always sets aside a small amount of memory for bookkeeping +} nvmlMemory_t; + /* * End of declarations from nvml.h **/ @@ -191,6 +203,7 @@ typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREAS typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_SET_COMPUTEMODE) (nvmlDevice_t, nvmlComputeMode_t); typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_SET_OPERATIONMODE) (nvmlDevice_t, nvmlGpuOperationMode_t); typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_PCIINFO) (nvmlDevice_t, nvmlPciInfo_t *); +typedef nvmlReturn_t (*NVML_API_CALL NVML_DEVICE_GET_MEMORYINFO) (nvmlDevice_t, nvmlMemory_t *); typedef struct hm_nvml_lib { @@ -212,6 +225,7 @@ typedef struct hm_nvml_lib NVML_DEVICE_GET_CURRENTCLOCKSTHROTTLEREASONS nvmlDeviceGetCurrentClocksThrottleReasons; NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREASONS nvmlDeviceGetSupportedClocksThrottleReasons; NVML_DEVICE_GET_PCIINFO nvmlDeviceGetPciInfo; + NVML_DEVICE_GET_MEMORYINFO nvmlDeviceGetMemoryInfo; } hm_nvml_lib_t; @@ -232,5 +246,6 @@ int hm_NVML_nvmlDeviceGetClockInfo (void *hashcat_ctx, nvmlDevice_t device, nvml int hm_NVML_nvmlDeviceGetTemperatureThreshold (void *hashcat_ctx, nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp); int hm_NVML_nvmlDeviceGetCurrPcieLinkWidth (void *hashcat_ctx, nvmlDevice_t device, unsigned int *currLinkWidth); int hm_NVML_nvmlDeviceGetPciInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlPciInfo_t *pci); +int hm_NVML_nvmlDeviceGetMemoryInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlMemory_t *mem); #endif // HC_NVML_H diff --git a/include/ext_sysfs_amdgpu.h b/include/ext_sysfs_amdgpu.h index 50c0dc569..d381d9cec 100644 --- a/include/ext_sysfs_amdgpu.h +++ b/include/ext_sysfs_amdgpu.h @@ -34,5 +34,6 @@ int hm_SYSFS_AMDGPU_get_pp_dpm_sclk (void *hashcat_ctx, const int backend_device int hm_SYSFS_AMDGPU_get_pp_dpm_mclk (void *hashcat_ctx, const int backend_device_idx, int *val); int hm_SYSFS_AMDGPU_get_pp_dpm_pcie (void *hashcat_ctx, const int backend_device_idx, int *val); int hm_SYSFS_AMDGPU_get_gpu_busy_percent (void *hashcat_ctx, const int backend_device_idx, int *val); +int hm_SYSFS_AMDGPU_get_mem_info_vram_used (void *hashcat_ctx, const int backend_device_idx, u64 *val); #endif // HC_EXT_SYSFS_AMDGPU_H diff --git a/include/hwmon.h b/include/hwmon.h index 545e22b2d..3d4bd7940 100644 --- a/include/hwmon.h +++ b/include/hwmon.h @@ -24,6 +24,7 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); +u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx); int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx); void hwmon_ctx_destroy (hashcat_ctx_t *hashcat_ctx); diff --git a/include/modules.h b/include/modules.h index aed8403ce..713b3f46f 100644 --- a/include/modules.h +++ b/include/modules.h @@ -20,7 +20,7 @@ u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *ha u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes); +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel); u32 module_forced_outfile_format (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra); diff --git a/include/tuningdb.h b/include/tuningdb.h index 608a25cfd..b2cafff67 100644 --- a/include/tuningdb.h +++ b/include/tuningdb.h @@ -17,7 +17,7 @@ int sort_by_tuning_db_entry (const void *v1, const void *v2); int tuning_db_init (hashcat_ctx_t *hashcat_ctx); void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx); -bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num, const int source); +bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num); tuning_db_entry_t *tuning_db_search (hashcat_ctx_t *hashcat_ctx, const char *device_name, const cl_device_type device_type, int attack_mode, const int hash_mode); #endif // HC_TUNINGDB_H diff --git a/include/types.h b/include/types.h index d02c1b783..8f265ab14 100644 --- a/include/types.h +++ b/include/types.h @@ -2067,6 +2067,7 @@ typedef struct hm_attrs bool threshold_slowdown_get_supported; bool throttle_get_supported; bool utilization_get_supported; + bool memoryused_get_supported; } hm_attrs_t; @@ -3013,7 +3014,7 @@ typedef struct module_ctx u32 (*module_dgst_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); bool (*module_dictstat_disable) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); u64 (*module_esalt_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); - const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *, const backend_ctx_t *, const hashes_t *); + const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *, const backend_ctx_t *, const hashes_t *, const u32, const u32); u32 (*module_forced_outfile_format) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); u32 (*module_hash_category) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); const char *(*module_hash_name) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *); diff --git a/src/backend.c b/src/backend.c index c5b95a659..80d1bfd60 100644 --- a/src/backend.c +++ b/src/backend.c @@ -24,6 +24,7 @@ #include "dynloader.h" #include "backend.h" #include "terminal.h" +#include "hwmon.h" #if defined (__linux__) static const char *const dri_card0_path = "/dev/dri/card0"; @@ -9649,7 +9650,44 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (module_ctx->module_extra_tuningdb_block != MODULE_DEFAULT) { - const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra, backend_ctx, hashes); + // We need this because we can't trust CUDA/HIP to give us the real free device memory + // The only way to do so is through low level APIs + + for (int i = 0; i < 10; i++) + { + const u64 used_bytes = hm_get_memoryused_with_devices_idx (hashcat_ctx, device_id); + + if (used_bytes) + { + if ((used_bytes > (2ULL * 1024 * 1024 * 1024)) + || (used_bytes > (device_param->device_global_mem * 0.5))) + { + event_log_warning (hashcat_ctx, "* Device #%u: Memory usage is too high: %" PRIu64 "/%" PRIu64 ", waiting...", device_id + 1, used_bytes, device_param->device_global_mem); + + sleep (1); + + continue; + } + + device_param->device_available_mem -= used_bytes; + + break; + } + else + { + break; + } + } + + u32 _kernel_accel = 0; + + tuning_db_entry_t *tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode); + + if (tuningdb_entry != NULL) _kernel_accel = tuningdb_entry->kernel_accel; + + if (user_options->kernel_accel_chgd == true) _kernel_accel = user_options->kernel_accel; + + const char *extra_tuningdb_block = module_ctx->module_extra_tuningdb_block (hashconfig, user_options, user_options_extra, backend_ctx, hashes, device_id, _kernel_accel); char *lines_buf = hcstrdup (extra_tuningdb_block); @@ -9669,7 +9707,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (next[0] == '#') continue; - tuning_db_process_line (hashcat_ctx, next, line_num, 2); + tuning_db_process_line (hashcat_ctx, next, line_num); } while ((next = strtok_r ((char *) NULL, "\n", &saveptr)) != NULL); diff --git a/src/ext_nvml.c b/src/ext_nvml.c index 25911df14..e6d49cd08 100644 --- a/src/ext_nvml.c +++ b/src/ext_nvml.c @@ -149,6 +149,7 @@ int nvml_init (void *hashcat_ctx) HC_LOAD_FUNC(nvml, nvmlDeviceGetCurrentClocksThrottleReasons, NVML_DEVICE_GET_CURRENTCLOCKSTHROTTLEREASONS, NVML, 0); HC_LOAD_FUNC(nvml, nvmlDeviceGetSupportedClocksThrottleReasons, NVML_DEVICE_GET_SUPPORTEDCLOCKSTHROTTLEREASONS, NVML, 0); HC_LOAD_FUNC(nvml, nvmlDeviceGetPciInfo, NVML_DEVICE_GET_PCIINFO, NVML, 0); + HC_LOAD_FUNC(nvml, nvmlDeviceGetMemoryInfo, NVML_DEVICE_GET_MEMORYINFO, NVML, 0); return 0; } @@ -392,3 +393,24 @@ int hm_NVML_nvmlDeviceGetPciInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlPc return 0; } + +int hm_NVML_nvmlDeviceGetMemoryInfo (void *hashcat_ctx, nvmlDevice_t device, nvmlMemory_t *mem) +{ + hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx; + + NVML_PTR *nvml = (NVML_PTR *) hwmon_ctx->hm_nvml; + + const nvmlReturn_t nvml_rc = nvml->nvmlDeviceGetMemoryInfo (device, mem); + + if (nvml_rc != NVML_SUCCESS) + { + const char *string = hm_NVML_nvmlErrorString (nvml, nvml_rc); + + event_log_error (hashcat_ctx, "nvmlDeviceGetMemoryInfo(): %s", string); + + return -1; + } + + return 0; +} + diff --git a/src/ext_sysfs_amdgpu.c b/src/ext_sysfs_amdgpu.c index 1aa53b210..70f071649 100644 --- a/src/ext_sysfs_amdgpu.c +++ b/src/ext_sysfs_amdgpu.c @@ -441,3 +441,55 @@ int hm_SYSFS_AMDGPU_get_gpu_busy_percent (void *hashcat_ctx, const int backend_d return 0; } + +int hm_SYSFS_AMDGPU_get_mem_info_vram_used (void *hashcat_ctx, const int backend_device_idx, u64 *val) +{ + char *syspath = hm_SYSFS_AMDGPU_get_syspath_device (hashcat_ctx, backend_device_idx); + + if (syspath == NULL) return -1; + + char *path; + + hc_asprintf (&path, "%s/mem_info_vram_used", syspath); + + hcfree (syspath); + + HCFILE fp; + + if (hc_fopen (&fp, path, "r") == false) + { + event_log_error (hashcat_ctx, "%s: %s", path, strerror (errno)); + + hcfree (path); + + return -1; + } + + u64 mem_info_vram_used = 0; + + while (!hc_feof (&fp)) + { + char buf[HCBUFSIZ_TINY]; + + char *ptr = hc_fgets (buf, sizeof (buf), &fp); + + if (ptr == NULL) continue; + + size_t len = strlen (ptr); + + if (len < 1) continue; + + int rc = sscanf (ptr, "%" PRIu64, &mem_info_vram_used); + + if (rc == 1) break; + } + + hc_fclose (&fp); + + *val = mem_info_vram_used; + + hcfree (path); + + return 0; +} + diff --git a/src/hwmon.c b/src/hwmon.c index a0f24c644..4f5264b3d 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -1214,6 +1214,60 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back return -1; } +u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx) +{ + hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx; + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + if (hwmon_ctx->enabled == false) return 0; + + if (hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported == false) return 0; + + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true) || (backend_ctx->devices_param[backend_device_idx].is_cuda == true)) + { + if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) + { + if (hwmon_ctx->hm_sysfs_amdgpu) + { + u64 used = 0; + + if (hm_SYSFS_AMDGPU_get_mem_info_vram_used (hashcat_ctx, backend_device_idx, &used) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false; + + return 0; + } + + return used; + } + } + + if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV) + { + if (hwmon_ctx->hm_nvml) + { + nvmlMemory_t mem; + + if (hm_NVML_nvmlDeviceGetMemoryInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &mem) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false; + + return 0; + } + + return mem.used; + } + } + } + } + + hwmon_ctx->hm_device[backend_device_idx].memoryused_get_supported = false; + + return 0; +} + int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) { bridge_ctx_t *bridge_ctx = hashcat_ctx->bridge_ctx; @@ -1227,12 +1281,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) if (bridge_ctx->enabled == true) backend_devices_cnt = 1; - #if !defined (WITH_HWMON) - return 0; - #endif // WITH_HWMON + //#if !defined (WITH_HWMON) + //return 0; + //#endif // WITH_HWMON if (user_options->usage > 0) return 0; - if (user_options->backend_info > 0) return 0; + //if (user_options->backend_info > 0) return 0; if (user_options->hash_info == true) return 0; if (user_options->keyspace == true) return 0; @@ -1241,7 +1295,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) if (user_options->stdout_flag == true) return 0; if (user_options->version == true) return 0; if (user_options->identify == true) return 0; - if (user_options->hwmon == false) return 0; + //we need hwmon support to get free memory per device support + //its a joke, but there's no way around + //if (user_options->hwmon == false) return 0; hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t)); @@ -1387,6 +1443,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_adapters_nvml[device_id].threshold_shutdown_get_supported = true; hm_adapters_nvml[device_id].threshold_slowdown_get_supported = true; hm_adapters_nvml[device_id].utilization_get_supported = true; + hm_adapters_nvml[device_id].memoryused_get_supported = true; } } } @@ -1419,6 +1476,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_adapters_nvml[device_id].threshold_shutdown_get_supported = true; hm_adapters_nvml[device_id].threshold_slowdown_get_supported = true; hm_adapters_nvml[device_id].utilization_get_supported = true; + hm_adapters_nvml[device_id].memoryused_get_supported = true; } } } @@ -1640,6 +1698,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true; hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true; hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true; + hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported = true; } } } @@ -1746,6 +1805,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported; hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_nvml[device_id].throttle_get_supported; hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_nvml[device_id].utilization_get_supported; + hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_nvml[device_id].memoryused_get_supported; } if (hwmon_ctx->hm_nvapi) @@ -1875,6 +1935,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_sysfs_amdgpu[device_id].threshold_slowdown_get_supported; hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_sysfs_amdgpu[device_id].throttle_get_supported; hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported; + hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported; } } @@ -1895,6 +1956,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported; hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported |= hm_adapters_nvml[device_id].throttle_get_supported; hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported |= hm_adapters_nvml[device_id].utilization_get_supported; + hwmon_ctx->hm_device[backend_devices_idx].memoryused_get_supported |= hm_adapters_nvml[device_id].memoryused_get_supported; } if (hwmon_ctx->hm_nvapi) @@ -1927,6 +1989,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx, backend_devices_idx); hm_get_throttle_with_devices_idx (hashcat_ctx, backend_devices_idx); hm_get_utilization_with_devices_idx (hashcat_ctx, backend_devices_idx); + hm_get_memoryused_with_devices_idx (hashcat_ctx, backend_devices_idx); } FREE_ADAPTERS; diff --git a/src/modules/module_08900.c b/src/modules/module_08900.c index 0865e8575..42fd456be 100644 --- a/src/modules/module_08900.c +++ b/src/modules/module_08900.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_SCRYPT = "SCRYPT"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -527,7 +464,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_09300.c b/src/modules/module_09300.c index 8f92e7fce..4f0f5bbb5 100644 --- a/src/modules/module_09300.c +++ b/src/modules/module_09300.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_CISCO9 = "$9$"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 1; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = req1 * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -488,7 +425,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c index c7a357dd0..063106d2f 100644 --- a/src/modules/module_15700.c +++ b/src/modules/module_15700.c @@ -56,6 +56,8 @@ typedef struct ethereum_scrypt static const char *SIGNATURE_ETHEREUM_SCRYPT = "$ethereum$s"; +static const u32 SCRYPT_THREADS = 4; + static const u64 SCRYPT_N = 262144; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -74,9 +76,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 4; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -98,90 +107,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -193,115 +234,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -587,7 +524,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_22700.c b/src/modules/module_22700.c index 30c106625..1b9113bd4 100644 --- a/src/modules/module_22700.c +++ b/src/modules/module_22700.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_MULTIBIT = "$multibit$"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -526,7 +463,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_24000.c b/src/modules/module_24000.c index 62217f2d8..159acbed0 100644 --- a/src/modules/module_24000.c +++ b/src/modules/module_24000.c @@ -57,27 +57,13 @@ typedef struct bestcrypt_scrypt // 16 is actually a bit low, we may need to change this depending on user response static const char *SIGNATURE_BESTCRYPT_SCRYPT = "$bcve$"; -static const u32 SCRYPT_MAX_ACCEL = 256; -static const u32 SCRYPT_MAX_THREADS = 4; + +static const u32 SCRYPT_THREADS = 16; static const u64 SCRYPT_N = 32768; static const u64 SCRYPT_R = 16; static const u64 SCRYPT_P = 1; -u32 module_kernel_accel_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_accel_min = 1; - - return kernel_accel_min; -} - -u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : SCRYPT_MAX_ACCEL; - - return kernel_accel_max; -} - u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 kernel_loops_min = 1; @@ -94,14 +80,14 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_min = 1; + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_min; } u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_MAX_THREADS; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -123,90 +109,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -215,121 +233,14 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // we need to set the self-test hash settings to pass the self-test // the decoder for the self-test is called after this function - const u32 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; - const u32 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; + const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; + const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u32 tmto_start = 1; - u32 tmto_stop = 6; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - -/* - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } -*/ - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit -/* - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); -*/ - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks; -// + size_pws_pre -// + size_pws_base; -/* - + EXTRA_SPACE; -*/ - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u32 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -593,8 +504,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook_size = MODULE_DEFAULT; module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; - module_ctx->module_kernel_accel_max = module_kernel_accel_max; - module_ctx->module_kernel_accel_min = module_kernel_accel_min; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; diff --git a/src/modules/module_27700.c b/src/modules/module_27700.c index 089deb5fa..fb3a31fa1 100644 --- a/src/modules/module_27700.c +++ b/src/modules/module_27700.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_MULTIBIT = "$multibit$"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -84,90 +93,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -179,115 +220,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -550,7 +487,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_28200.c b/src/modules/module_28200.c index 86a636adf..52a7adbdd 100644 --- a/src/modules/module_28200.c +++ b/src/modules/module_28200.c @@ -57,6 +57,8 @@ typedef struct exodus static const char *SIGNATURE_EXODUS = "EXODUS"; +static const u32 SCRYPT_THREADS = 32; + static const u64 SCRYPT_N = 16384; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 1; @@ -75,9 +77,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -96,90 +105,122 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED return esalt_size; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -191,115 +232,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -634,7 +571,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_29800.c b/src/modules/module_29800.c index d1be6be39..633ef1978 100644 --- a/src/modules/module_29800.c +++ b/src/modules/module_29800.c @@ -49,6 +49,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_BISQ = "$bisq$"; +static const u32 SCRYPT_THREADS = 16; + static const u64 SCRYPT_N = 32768; static const u64 SCRYPT_R = 8; static const u64 SCRYPT_P = 6; @@ -67,9 +69,16 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return kernel_loops_max; } +u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_min = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; + + return kernel_threads_min; +} + u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 32; + const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : SCRYPT_THREADS; return kernel_threads_max; } @@ -91,90 +100,122 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } -const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes) +u32 tmto = 0; + +const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel) { + // preprocess tmto in case user has overridden + // it's important to set to 0 otherwise so we can postprocess tmto in that case + + tmto = (user_options->scrypt_tmto_chgd == true) ? user_options->scrypt_tmto : 0; + // we enforce the same configuration for all hashes, so this should be fine const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 req1 = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); + const u64 size_per_accel = (128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra)) >> tmto; int lines_sz = 4096; char *lines_buf = hcmalloc (lines_sz); int lines_pos = 0; - for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) + hc_device_param_t *device_param = &backend_ctx->devices_param[device_id]; + + const u32 device_processors = device_param->device_processors; + + const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)); + + u32 kernel_accel_new = device_processors; + + if (kernel_accel) { - hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; + // from command line or tuning db has priority - if (device_param->skipped == true) continue; - - const u64 avail = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (2 * req1); - - char *new_device_name = hcstrdup (device_param->device_name); - - for (size_t i = 0; i < strlen (new_device_name); i++) - { - if (new_device_name[i] == ' ') new_device_name[i] = '_'; - } - - char *out_name = new_device_name; - - if (memcmp (new_device_name, "AMD_", 4) == 0) out_name += 4; - if (memcmp (new_device_name, "NVIDIA_", 7) == 0) out_name += 7; - - // ok, try to find a nice accel programmatically - - u32 accel = device_param->device_processors; + kernel_accel_new = user_options->kernel_accel; + } + else + { + // find a nice kernel_accel programmatically if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - // expect to change any of this - - if (avail < (req1 * accel)) // not enough memory + if ((size_per_accel * device_processors) > available_mem) // not enough memory { - const float multi = (float) avail / req1; + const float multi = (float) available_mem / size_per_accel; - accel = multi; + int accel_multi; - for (int i = 1; i <= 4; i++) // this is tmto + for (accel_multi = 1; accel_multi <= 2; accel_multi++) { - if (device_param->device_processors > accel) - { - accel = ((u64) multi << i) & ~3; - } + kernel_accel_new = multi * (1 << accel_multi); + + if (kernel_accel_new >= device_processors) break; + } + + // we need some space for tmps[], ... + + kernel_accel_new -= (1 << accel_multi); + + // clamp if close to device processors -- 10% good? + + if ((kernel_accel_new > device_processors) && ((kernel_accel_new - device_processors) <= (device_processors / 10))) + { + kernel_accel_new = device_processors; } } else { for (int i = 1; i <= 8; i++) { - if ((avail * 2) > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } } else { - const u64 req1 = 128 * scrypt_r * scrypt_N; - for (int i = 1; i <= 8; i++) { - if (avail > (req1 * accel)) + if ((size_per_accel * device_processors * i) < available_mem) { - accel = device_param->device_processors * i; + kernel_accel_new = device_processors * i; } } } - - lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", out_name, user_options->hash_mode, accel); - - hcfree (new_device_name); } + // fix tmto if user allows + + if (tmto == 0) + { + const u32 tmto_start = 1; + const u32 tmto_stop = 5; + + for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++) + { + if (available_mem > (kernel_accel_new * (size_per_accel >> tmto_new))) + { + tmto = tmto_new; + + break; + } + } + } + + char *new_device_name = hcstrdup (device_param->device_name); + + for (size_t i = 0; i < strlen (new_device_name); i++) + { + if (new_device_name[i] == ' ') new_device_name[i] = '_'; + } + + lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new); + + hcfree (new_device_name); + return lines_buf; } @@ -186,115 +227,11 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 scrypt_N = (hashes->salts_buf[0].scrypt_N) ? hashes->salts_buf[0].scrypt_N : SCRYPT_N; const u64 scrypt_r = (hashes->salts_buf[0].scrypt_r) ? hashes->salts_buf[0].scrypt_r : SCRYPT_R; - const u64 kernel_power_max = ((OPTS_TYPE & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + const u64 size_per_accel = 128 * scrypt_r * scrypt_N * module_kernel_threads_max (hashconfig, user_options, user_options_extra); - u64 tmto_start = 0; - u64 tmto_stop = 4; + u64 size_scrypt = size_per_accel * device_param->kernel_accel_max; - if (user_options->scrypt_tmto_chgd == true) - { - tmto_start = user_options->scrypt_tmto; - tmto_stop = user_options->scrypt_tmto; - } - - // size_pws - - const u64 size_pws = kernel_power_max * sizeof (pw_t); - - const u64 size_pws_amp = size_pws; - - // size_pws_comp - - const u64 size_pws_comp = kernel_power_max * (sizeof (u32) * 64); - - // size_pws_idx - - const u64 size_pws_idx = (kernel_power_max + 1) * sizeof (pw_idx_t); - - // size_tmps - - const u64 size_tmps = kernel_power_max * hashconfig->tmp_size; - - // size_hooks - - const u64 size_hooks = kernel_power_max * hashconfig->hook_size; - - u64 size_pws_pre = 4; - u64 size_pws_base = 4; - - if (user_options->slow_candidates == true) - { - // size_pws_pre - - size_pws_pre = kernel_power_max * sizeof (pw_pre_t); - - // size_pws_base - - size_pws_base = kernel_power_max * sizeof (pw_pre_t); - } - - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. - // let's add some extra space just to be sure. - // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit - - u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; - - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); - EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); - - const u64 scrypt_extra_space - = device_param->size_bfs - + device_param->size_combs - + device_param->size_digests - + device_param->size_esalts - + device_param->size_markov_css - + device_param->size_plains - + device_param->size_results - + device_param->size_root_css - + device_param->size_rules - + device_param->size_rules_c - + device_param->size_salts - + device_param->size_shown - + device_param->size_tm - + device_param->size_st_digests - + device_param->size_st_salts - + device_param->size_st_esalts - + size_pws - + size_pws_amp - + size_pws_comp - + size_pws_idx - + size_tmps - + size_hooks - + size_pws_pre - + size_pws_base - + EXTRA_SPACE; - - bool not_enough_memory = true; - - u64 size_scrypt = 0; - - u64 tmto; - - for (tmto = tmto_start; tmto <= tmto_stop; tmto++) - { - size_scrypt = (128ULL * scrypt_r) * scrypt_N; - - size_scrypt /= 1ull << tmto; - - size_scrypt *= kernel_power_max; - - if ((size_scrypt / 4) > device_param->device_maxmem_alloc) continue; - - if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) continue; - - not_enough_memory = false; - - break; - } - - if (not_enough_memory == true) return -1; - - return size_scrypt; + return size_scrypt / (1 << tmto); } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -557,7 +494,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = module_kernel_threads_min; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/tuningdb.c b/src/tuningdb.c index 406359ab4..1c5e6cb32 100644 --- a/src/tuningdb.c +++ b/src/tuningdb.c @@ -43,11 +43,6 @@ int sort_by_tuning_db_entry (const void *v1, const void *v2) if (res3 != 0) return (res3); - const int res4 = t1->source - - t2->source; - - if (res4 != 0) return (res4); - return 0; } @@ -118,7 +113,7 @@ int tuning_db_init (hashcat_ctx_t *hashcat_ctx) if (line_buf[0] == '#') continue; - tuning_db_process_line (hashcat_ctx, line_buf, line_num, 1); + tuning_db_process_line (hashcat_ctx, line_buf, line_num); } hcfree (buf); @@ -167,7 +162,7 @@ void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx) memset (tuning_db, 0, sizeof (tuning_db_t)); } -bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num, const int source) +bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num) { tuning_db_t *tuning_db = hashcat_ctx->tuning_db; user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; @@ -353,7 +348,6 @@ bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, c entry->vector_width = vector_width; entry->kernel_accel = kernel_accel; entry->kernel_loops = kernel_loops; - entry->source = source; tuning_db->entry_cnt++; } @@ -430,12 +424,11 @@ static tuning_db_entry_t *tuning_db_search_real (hashcat_ctx_t *hashcat_ctx, con // this will produce all 2^3 combinations required - for (i = 0; i < 16; i++) + for (i = 0; i < 8; i++) { - s.source = (i & 1) ? 2 : 1; + s.device_name = (i & 1) ? "*" : device_name_nospace; s.attack_mode = (i & 2) ? -1 : attack_mode; s.hash_mode = (i & 4) ? -1 : hash_mode; - s.device_name = (i & 8) ? "*" : device_name_nospace; entry = (tuning_db_entry_t *) bsearch (&s, tuning_db->entry_buf, tuning_db->entry_cnt, sizeof (tuning_db_entry_t), sort_by_tuning_db_entry); @@ -443,7 +436,7 @@ static tuning_db_entry_t *tuning_db_search_real (hashcat_ctx_t *hashcat_ctx, con // in non-wildcard mode do some additional checks: - if ((i & 8) == 0) + if ((i & 1) == 0) { // in case we have an alias-name diff --git a/src/user_options.c b/src/user_options.c index 217e8d3f3..7dbe6567d 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -379,8 +379,8 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv) case IDX_INCREMENT_MAX: case IDX_HOOK_THREADS: case IDX_BACKEND_DEVICES_VIRTMULTI: - case IDX_BACKEND_DEVICES_VIRTHOST: - case IDX_BACKEND_DEVICES_KEEPFREE: + case IDX_BACKEND_DEVICES_VIRTHOST: + case IDX_BACKEND_DEVICES_KEEPFREE: case IDX_BENCHMARK_MAX: case IDX_BENCHMARK_MIN: #ifdef WITH_BRAIN @@ -816,14 +816,14 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx) event_log_error (hashcat_ctx, "Invalid --backend-devices-virthost value specified."); return -1; - } + } if (user_options->backend_devices_keepfree > 100) { event_log_error (hashcat_ctx, "Invalid --backend-devices-keepfree value specified."); return -1; - } + } if (user_options->outfile_format == 0) { @@ -1895,6 +1895,14 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx) } #endif + if (user_options->hwmon == false) + { + // some algorithm, such as SCRYPT, depend on accurate free memory values + // the only way to get them is through low-level APIs such as nvml via hwmon + + user_options->hwmon = true; + } + if (user_options->stdout_flag) { user_options->hwmon = false; @@ -3325,8 +3333,8 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx) logfile_top_uint64 (user_options->skip); logfile_top_uint (user_options->attack_mode); logfile_top_uint (user_options->backend_devices_virtmulti); - logfile_top_uint (user_options->backend_devices_virthost); - logfile_top_uint (user_options->backend_devices_keepfree); + logfile_top_uint (user_options->backend_devices_virthost); + logfile_top_uint (user_options->backend_devices_keepfree); logfile_top_uint (user_options->benchmark); logfile_top_uint (user_options->benchmark_all); logfile_top_uint (user_options->benchmark_max); diff --git a/tunings/Module_08900.hctune b/tunings/Module_08900.hctune index ecaa0e353..46df052b5 100644 --- a/tunings/Module_08900.hctune +++ b/tunings/Module_08900.hctune @@ -24,4 +24,3 @@ # It's better to derive the tuning based on the hash information (handled by the hash-mode plugin). # The tunings from the hash-mode plugin may be slightly off, so if you have better values, you can hardcode them here. - diff --git a/tunings/Module_09300.hctune b/tunings/Module_09300.hctune index 3277390ab..d98505795 100644 --- a/tunings/Module_09300.hctune +++ b/tunings/Module_09300.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 9300 1 512 A -ALIAS_AMD_RX6900XT * 9300 1 720 A -ALIAS_AMD_RX7900XTX * 9300 1 840 A - diff --git a/tunings/Module_15700.hctune b/tunings/Module_15700.hctune index c19ae375e..a44bd5a9c 100644 --- a/tunings/Module_15700.hctune +++ b/tunings/Module_15700.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 15700 1 180 A -ALIAS_AMD_RX6900XT * 15700 1 56 A -ALIAS_AMD_RX7900XTX * 15700 1 92 A - diff --git a/tunings/Module_22700.hctune b/tunings/Module_22700.hctune index be4cd8a4a..c08bd7a51 100644 --- a/tunings/Module_22700.hctune +++ b/tunings/Module_22700.hctune @@ -19,7 +19,14 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 22700 1 180 A -ALIAS_AMD_RX6900XT * 22700 1 56 A -ALIAS_AMD_RX7900XTX * 22700 1 92 A +#Leaving this here as a reference +#GeForce_GTX_980 * 22700 1 28 A +#GeForce_GTX_1630 * 22700 1 11 A +#GeForce_RTX_2080_Ti * 22700 1 78 A +#GeForce_RTX_3090 * 22700 1 82 A +#GeForce_RTX_4090 * 22700 1 180 A +#ALIAS_AMD_RX480 * 22700 1 28 A +#ALIAS_AMD_Vega64 * 22700 1 28 A +#ALIAS_AMD_RX6900XT * 22700 1 56 A +#ALIAS_AMD_RX7900XTX * 22700 1 92 A diff --git a/tunings/Module_24000.hctune b/tunings/Module_24000.hctune index 71f61fe67..52e4b78bb 100644 --- a/tunings/Module_24000.hctune +++ b/tunings/Module_24000.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 24000 1 180 A -ALIAS_AMD_RX6900XT * 24000 1 56 A -ALIAS_AMD_RX7900XTX * 24000 1 92 A - diff --git a/tunings/Module_27700.hctune b/tunings/Module_27700.hctune index 32b5253b4..095c829f6 100644 --- a/tunings/Module_27700.hctune +++ b/tunings/Module_27700.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 27700 1 180 A -ALIAS_AMD_RX6900XT * 27700 1 56 A -ALIAS_AMD_RX7900XTX * 27700 1 92 A - diff --git a/tunings/Module_28200.hctune b/tunings/Module_28200.hctune index 50a09b89c..2759beb00 100644 --- a/tunings/Module_28200.hctune +++ b/tunings/Module_28200.hctune @@ -19,7 +19,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops -GeForce_RTX_4090 * 28200 1 180 A -ALIAS_AMD_RX6900XT * 28200 1 56 A -ALIAS_AMD_RX7900XTX * 28200 1 92 A - diff --git a/tunings/Module_29800.hctune b/tunings/Module_29800.hctune index 31bea6286..ce9ebd31d 100644 --- a/tunings/Module_29800.hctune +++ b/tunings/Module_29800.hctune @@ -18,8 +18,3 @@ #Device Attack Hash Vector Kernel Kernel #Name Mode Type Width Accel Loops - -GeForce_RTX_4090 * 29800 1 180 A -ALIAS_AMD_RX6900XT * 29800 1 56 A -ALIAS_AMD_RX7900XTX * 29800 1 92 A -