Balancing the workload turns out to be very efficient

pull/332/head
jsteube 8 years ago
parent aefd3b03a3
commit dca9683b3b

@ -2914,32 +2914,24 @@ static void autotune (hc_device_param_t *device_param)
} }
} }
// sometimes we're in a bad situation that the algorithm is so slow that we can not // balancing the workload turns out to be very efficient
// create enough kernel_accel to do both, keep the gpu busy and stay below target_ms.
// however, we need to have a minimum kernel_accel and kernel_loops of 32.
// luckily, at this level of workload, it became a linear function
if (kernel_accel < 32 || kernel_loops < 32) const u32 kernel_power_balance = kernel_accel * kernel_loops;
{
const u32 kernel_power = kernel_accel * kernel_loops;
// find sqrt
u32 sqrtv; u32 sqrtv;
for (sqrtv = 1; sqrtv < 0x100000; sqrtv++) for (sqrtv = 1; sqrtv < 0x100000; sqrtv++)
{ {
if ((sqrtv * sqrtv) >= kernel_power) break; if ((sqrtv * sqrtv) >= kernel_power_balance) break;
} }
const u32 kernel_accel_try = sqrtv; const u32 kernel_accel_try = sqrtv;
const u32 kernel_loops_try = sqrtv; const u32 kernel_loops_try = sqrtv;
if ((kernel_accel_try <= kernel_accel_max) && (kernel_loops_try >= kernel_loops_min)) if ((kernel_accel_try <= kernel_accel_max) && (kernel_loops_try >= kernel_loops_min))
{ {
kernel_accel = kernel_accel_try; kernel_accel = kernel_accel_try;
kernel_loops = kernel_loops_try; kernel_loops = kernel_loops_try;
}
} }
// reset fake words // reset fake words

Loading…
Cancel
Save