2016-09-15 02:29:22 +00:00
|
|
|
/**
|
|
|
|
* Author......: See docs/credits.txt
|
|
|
|
* License.....: MIT
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "common.h"
|
|
|
|
#include "types.h"
|
|
|
|
#include "logging.h"
|
|
|
|
#include "memory.h"
|
|
|
|
#include "interface.h"
|
|
|
|
#include "timer.h"
|
|
|
|
#include "ext_OpenCL.h"
|
|
|
|
#include "ext_ADL.h"
|
|
|
|
#include "ext_nvapi.h"
|
|
|
|
#include "ext_nvml.h"
|
|
|
|
#include "ext_xnvctrl.h"
|
|
|
|
#include "mpsp.h"
|
|
|
|
#include "rp_cpu.h"
|
2016-09-15 14:02:52 +00:00
|
|
|
#include "tuningdb.h"
|
2016-09-20 11:18:47 +00:00
|
|
|
#include "thread.h"
|
2016-09-15 02:29:22 +00:00
|
|
|
#include "opencl.h"
|
2016-09-15 14:02:52 +00:00
|
|
|
#include "hwmon.h"
|
|
|
|
#include "restore.h"
|
2016-09-30 07:25:51 +00:00
|
|
|
#include "hashes.h"
|
2016-09-15 02:29:22 +00:00
|
|
|
#include "outfile.h"
|
|
|
|
#include "potfile.h"
|
|
|
|
#include "debugfile.h"
|
|
|
|
#include "loopback.h"
|
|
|
|
#include "data.h"
|
|
|
|
#include "status.h"
|
|
|
|
#include "shared.h"
|
|
|
|
#include "terminal.h"
|
|
|
|
#include "monitor.h"
|
|
|
|
|
|
|
|
extern hc_global_data_t data;
|
|
|
|
|
|
|
|
void *thread_monitor (void *p)
|
|
|
|
{
|
2016-09-24 23:02:44 +00:00
|
|
|
restore_ctx_t *restore_ctx = data.restore_ctx;
|
2016-09-22 10:45:48 +00:00
|
|
|
user_options_t *user_options = data.user_options;
|
|
|
|
user_options_extra_t *user_options_extra = data.user_options_extra;
|
|
|
|
hashconfig_t *hashconfig = data.hashconfig;
|
|
|
|
hashes_t *hashes = data.hashes;
|
2016-09-29 13:19:12 +00:00
|
|
|
cpt_ctx_t *cpt_ctx = data.cpt_ctx;
|
2016-09-27 16:32:09 +00:00
|
|
|
straight_ctx_t *straight_ctx = data.straight_ctx;
|
|
|
|
combinator_ctx_t *combinator_ctx = data.combinator_ctx;
|
2016-09-25 23:18:00 +00:00
|
|
|
mask_ctx_t *mask_ctx = data.mask_ctx;
|
2016-09-27 16:32:09 +00:00
|
|
|
opencl_ctx_t *opencl_ctx = data.opencl_ctx;
|
2016-09-28 20:28:44 +00:00
|
|
|
hwmon_ctx_t *hwmon_ctx = data.hwmon_ctx;
|
2016-09-29 20:27:04 +00:00
|
|
|
status_ctx_t *status_ctx = data.status_ctx;
|
2016-09-22 09:56:06 +00:00
|
|
|
|
|
|
|
bool runtime_check = false;
|
|
|
|
bool remove_check = false;
|
|
|
|
bool status_check = false;
|
|
|
|
bool restore_check = false;
|
|
|
|
bool hwmon_check = false;
|
|
|
|
|
|
|
|
const int sleep_time = 1;
|
|
|
|
const int temp_threshold = 1; // degrees celcius
|
|
|
|
const int fan_speed_min = 15; // in percentage
|
|
|
|
const int fan_speed_max = 100;
|
|
|
|
|
|
|
|
if (user_options->runtime)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
runtime_check = true;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->restore_timer)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
restore_check = true;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if ((user_options->remove == true) && (hashes->hashlist_mode == HL_MODE_FILE))
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
remove_check = true;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->status == true)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
status_check = true;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->gpu_temp_disable == false)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
hwmon_check = true;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if ((runtime_check == false) && (remove_check == false) && (status_check == false) && (restore_check == false) && (hwmon_check == false))
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
return (p);
|
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
// these variables are mainly used for fan control
|
|
|
|
|
|
|
|
int *fan_speed_chgd = (int *) mycalloc (opencl_ctx->devices_cnt, sizeof (int));
|
|
|
|
|
|
|
|
// temperature controller "loopback" values
|
|
|
|
|
|
|
|
int *temp_diff_old = (int *) mycalloc (opencl_ctx->devices_cnt, sizeof (int));
|
|
|
|
int *temp_diff_sum = (int *) mycalloc (opencl_ctx->devices_cnt, sizeof (int));
|
|
|
|
|
|
|
|
time_t last_temp_check_time;
|
|
|
|
|
|
|
|
time (&last_temp_check_time);
|
|
|
|
|
|
|
|
u32 slowdown_warnings = 0;
|
|
|
|
|
|
|
|
u32 restore_left = user_options->restore_timer;
|
|
|
|
u32 remove_left = user_options->remove_timer;
|
|
|
|
u32 status_left = user_options->status_timer;
|
|
|
|
|
2016-09-29 21:49:33 +00:00
|
|
|
while (status_ctx->shutdown_inner == false)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
hc_sleep (sleep_time);
|
|
|
|
|
2016-09-29 21:25:29 +00:00
|
|
|
if (status_ctx->devices_status == STATUS_INIT) continue;
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (hwmon_check == true)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-29 22:04:12 +00:00
|
|
|
hc_thread_mutex_lock (status_ctx->mux_hwmon);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-15 14:02:52 +00:00
|
|
|
for (uint device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-15 14:02:52 +00:00
|
|
|
hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
if (device_param->skipped) continue;
|
|
|
|
|
|
|
|
if (device_param->device_vendor_id == VENDOR_ID_NV)
|
|
|
|
{
|
2016-09-28 20:28:44 +00:00
|
|
|
if (hwmon_ctx->hm_nvapi)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
NV_GPU_PERF_POLICIES_INFO_PARAMS_V1 perfPolicies_info;
|
|
|
|
NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1 perfPolicies_status;
|
|
|
|
|
|
|
|
memset (&perfPolicies_info, 0, sizeof (NV_GPU_PERF_POLICIES_INFO_PARAMS_V1));
|
|
|
|
memset (&perfPolicies_status, 0, sizeof (NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1));
|
|
|
|
|
|
|
|
perfPolicies_info.version = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_INFO_PARAMS_V1, 1);
|
|
|
|
perfPolicies_status.version = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1, 1);
|
|
|
|
|
2016-09-28 20:28:44 +00:00
|
|
|
hm_NvAPI_GPU_GetPerfPoliciesInfo (hwmon_ctx->hm_nvapi, hwmon_ctx->hm_device[device_id].nvapi, &perfPolicies_info);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
perfPolicies_status.info_value = perfPolicies_info.info_value;
|
|
|
|
|
2016-09-28 20:28:44 +00:00
|
|
|
hm_NvAPI_GPU_GetPerfPoliciesStatus (hwmon_ctx->hm_nvapi, hwmon_ctx->hm_device[device_id].nvapi, &perfPolicies_status);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
if (perfPolicies_status.throttle & 2)
|
|
|
|
{
|
|
|
|
if (slowdown_warnings < 3)
|
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->quiet == false) clear_prompt ();
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
log_info ("WARNING: Drivers temperature threshold hit on GPU #%d, expect performance to drop...", device_id + 1);
|
|
|
|
|
|
|
|
if (slowdown_warnings == 2)
|
|
|
|
{
|
|
|
|
log_info ("");
|
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->quiet == false) send_prompt ();
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
slowdown_warnings++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
slowdown_warnings = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-29 22:04:12 +00:00
|
|
|
hc_thread_mutex_unlock (status_ctx->mux_hwmon);
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (hwmon_check == true)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-29 22:04:12 +00:00
|
|
|
hc_thread_mutex_lock (status_ctx->mux_hwmon);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
time_t temp_check_time;
|
|
|
|
|
|
|
|
time (&temp_check_time);
|
|
|
|
|
|
|
|
uint Ta = temp_check_time - last_temp_check_time; // set Ta = sleep_time; is not good enough (see --remove etc)
|
|
|
|
|
|
|
|
if (Ta == 0) Ta = 1;
|
|
|
|
|
2016-09-15 14:02:52 +00:00
|
|
|
for (uint device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-15 14:02:52 +00:00
|
|
|
hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
if (device_param->skipped) continue;
|
|
|
|
|
2016-09-15 14:02:52 +00:00
|
|
|
if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-28 20:28:44 +00:00
|
|
|
const int temperature = hm_get_temperature_with_device_id (hwmon_ctx, opencl_ctx, device_id);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (temperature > (int) user_options->gpu_temp_abort)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
log_error ("ERROR: Temperature limit on GPU %d reached, aborting...", device_id + 1);
|
|
|
|
|
2016-09-29 21:25:29 +00:00
|
|
|
myabort (status_ctx);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
const u32 gpu_temp_retain = user_options->gpu_temp_retain;
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
if (gpu_temp_retain)
|
|
|
|
{
|
2016-09-28 20:28:44 +00:00
|
|
|
if (hwmon_ctx->hm_device[device_id].fan_set_supported == 1)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
int temp_cur = temperature;
|
|
|
|
|
|
|
|
int temp_diff_new = gpu_temp_retain - temp_cur;
|
|
|
|
|
|
|
|
temp_diff_sum[device_id] = temp_diff_sum[device_id] + temp_diff_new;
|
|
|
|
|
|
|
|
// calculate Ta value (time difference in seconds between the last check and this check)
|
|
|
|
|
|
|
|
last_temp_check_time = temp_check_time;
|
|
|
|
|
|
|
|
float Kp = 1.8f;
|
|
|
|
float Ki = 0.005f;
|
|
|
|
float Kd = 6;
|
|
|
|
|
|
|
|
// PID controller (3-term controller: proportional - Kp, integral - Ki, derivative - Kd)
|
|
|
|
|
|
|
|
int fan_diff_required = (int) (Kp * (float)temp_diff_new + Ki * Ta * (float)temp_diff_sum[device_id] + Kd * ((float)(temp_diff_new - temp_diff_old[device_id])) / Ta);
|
|
|
|
|
|
|
|
if (abs (fan_diff_required) >= temp_threshold)
|
|
|
|
{
|
2016-09-28 20:28:44 +00:00
|
|
|
const int fan_speed_cur = hm_get_fanspeed_with_device_id (hwmon_ctx, opencl_ctx, device_id);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
int fan_speed_level = fan_speed_cur;
|
|
|
|
|
|
|
|
if (fan_speed_chgd[device_id] == 0) fan_speed_level = temp_cur;
|
|
|
|
|
|
|
|
int fan_speed_new = fan_speed_level - fan_diff_required;
|
|
|
|
|
|
|
|
if (fan_speed_new > fan_speed_max) fan_speed_new = fan_speed_max;
|
|
|
|
if (fan_speed_new < fan_speed_min) fan_speed_new = fan_speed_min;
|
|
|
|
|
|
|
|
if (fan_speed_new != fan_speed_cur)
|
|
|
|
{
|
|
|
|
int freely_change_fan_speed = (fan_speed_chgd[device_id] == 1);
|
|
|
|
int fan_speed_must_change = (fan_speed_new > fan_speed_cur);
|
|
|
|
|
|
|
|
if ((freely_change_fan_speed == 1) || (fan_speed_must_change == 1))
|
|
|
|
{
|
|
|
|
if (device_param->device_vendor_id == VENDOR_ID_AMD)
|
|
|
|
{
|
2016-09-28 20:28:44 +00:00
|
|
|
hm_set_fanspeed_with_device_id_adl (hwmon_ctx, device_id, fan_speed_new, 1);
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
else if (device_param->device_vendor_id == VENDOR_ID_NV)
|
|
|
|
{
|
|
|
|
#if defined (_WIN)
|
2016-09-28 20:28:44 +00:00
|
|
|
hm_set_fanspeed_with_device_id_nvapi (hwmon_ctx, device_id, fan_speed_new, 1);
|
2016-09-15 02:29:22 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined (__linux__)
|
2016-09-28 20:28:44 +00:00
|
|
|
hm_set_fanspeed_with_device_id_xnvctrl (hwmon_ctx, device_id, fan_speed_new);
|
2016-09-15 02:29:22 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
fan_speed_chgd[device_id] = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
temp_diff_old[device_id] = temp_diff_new;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-29 22:04:12 +00:00
|
|
|
hc_thread_mutex_unlock (status_ctx->mux_hwmon);
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (restore_check == true)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
restore_left--;
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (restore_left == 0)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-24 23:02:44 +00:00
|
|
|
if (user_options->restore_disable == 0) cycle_restore (restore_ctx, opencl_ctx);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
restore_left = user_options->restore_timer;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-29 21:49:33 +00:00
|
|
|
if ((runtime_check == true) && (status_ctx->runtime_start > 0))
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-29 21:49:33 +00:00
|
|
|
double ms_paused = status_ctx->ms_paused;
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-29 21:25:29 +00:00
|
|
|
if (status_ctx->devices_status == STATUS_PAUSED)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
double ms_paused_tmp = 0;
|
|
|
|
|
2016-09-29 21:49:33 +00:00
|
|
|
hc_timer_get (status_ctx->timer_paused, ms_paused_tmp);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
ms_paused += ms_paused_tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
time_t runtime_cur;
|
|
|
|
|
|
|
|
time (&runtime_cur);
|
|
|
|
|
2016-09-29 21:49:33 +00:00
|
|
|
int runtime_left = status_ctx->proc_start + user_options->runtime + status_ctx->prepare_time + (ms_paused / 1000) - runtime_cur;
|
2016-09-15 02:29:22 +00:00
|
|
|
|
|
|
|
if (runtime_left <= 0)
|
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->benchmark == false)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->quiet == false) log_info ("\nNOTE: Runtime limit reached, aborting...\n");
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-29 21:25:29 +00:00
|
|
|
myabort (status_ctx);
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (remove_check == true)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
remove_left--;
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (remove_left == 0)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-16 15:01:18 +00:00
|
|
|
if (hashes->digests_saved != hashes->digests_done)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-16 15:01:18 +00:00
|
|
|
hashes->digests_saved = hashes->digests_done;
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
save_hash (user_options, hashconfig, hashes);
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
remove_left = user_options->remove_timer;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (status_check == true)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
|
|
|
status_left--;
|
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (status_left == 0)
|
2016-09-15 02:29:22 +00:00
|
|
|
{
|
2016-09-29 22:04:12 +00:00
|
|
|
hc_thread_mutex_lock (status_ctx->mux_display);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->quiet == false) clear_prompt ();
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->quiet == false) log_info ("");
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-29 20:27:04 +00:00
|
|
|
status_display (status_ctx, opencl_ctx, hwmon_ctx, hashconfig, hashes, cpt_ctx, restore_ctx, user_options, user_options_extra, straight_ctx, combinator_ctx, mask_ctx);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
if (user_options->quiet == false) log_info ("");
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-29 22:04:12 +00:00
|
|
|
hc_thread_mutex_unlock (status_ctx->mux_display);
|
2016-09-15 02:29:22 +00:00
|
|
|
|
2016-09-22 09:56:06 +00:00
|
|
|
status_left = user_options->status_timer;
|
2016-09-15 02:29:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
myfree (fan_speed_chgd);
|
|
|
|
|
|
|
|
myfree (temp_diff_old);
|
|
|
|
myfree (temp_diff_sum);
|
|
|
|
|
|
|
|
p = NULL;
|
|
|
|
|
|
|
|
return (p);
|
|
|
|
}
|