Allow numbers not power of 2 in autotune

pull/352/merge
Jens Steube 8 years ago
parent 9f45ceb128
commit 964a901f2a

@ -2979,10 +2979,13 @@ static void autotune (hc_device_param_t *device_param)
if ((kernel_loops_min < kernel_loops_max) && (kernel_accel_min < kernel_accel_max))
{
for (u32 f = 2; f < 1024; f++)
u32 kernel_accel_orig = kernel_accel;
u32 kernel_loops_orig = kernel_loops;
for (u32 f = 1; f < 1024; f++)
{
const u32 kernel_accel_try = kernel_accel * f;
const u32 kernel_loops_try = kernel_loops / f;
const u32 kernel_accel_try = (float) kernel_accel_orig * f;
const u32 kernel_loops_try = (float) kernel_loops_orig / f;
if (kernel_accel_try > kernel_accel_max) break;
if (kernel_loops_try < kernel_loops_min) break;
@ -2991,6 +2994,8 @@ static void autotune (hc_device_param_t *device_param)
if (diff_new > diff) break;
diff_new = diff;
double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try);
for (int i = 0; i < VERIFIER_CNT; i++)

Loading…
Cancel
Save