diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index fbd03c986..f3093aa46 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -3,10 +3,6 @@ * License.....: MIT */ -//fails on intel opencl sdk -//#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable -//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable - /** * device type */ @@ -64,14 +60,28 @@ /** * Unrolling is generally enabled, for all device types and hash modes * There's a few exception when it's better not to unroll + * Some algorithms run into too much register pressure due to loop unrolling */ -// Some algorithms run into too much register pressure due to loop unrolling +// generic vendors: those algos have shown that they produce better results on both amd and nv when not unrolled +// so we can assume they will produce better results on other vendors as well -#ifdef IS_NV -#ifdef IS_GPU - -#if KERN_TYPE == 1500 +#if KERN_TYPE == 1420 +#undef _unroll +#endif +#if KERN_TYPE == 1450 +#undef _unroll +#endif +#if KERN_TYPE == 1460 +#undef _unroll +#endif +#if KERN_TYPE == 1720 +#undef _unroll +#endif +#if KERN_TYPE == 1750 +#undef _unroll +#endif +#if KERN_TYPE == 1760 #undef _unroll #endif #if KERN_TYPE == 1800 @@ -104,21 +114,12 @@ #if KERN_TYPE == 7400 #undef _unroll #endif -#if KERN_TYPE == 7900 -#undef _unroll -#endif #if KERN_TYPE == 8200 #undef _unroll #endif #if KERN_TYPE == 8900 #undef _unroll #endif -#if KERN_TYPE == 10400 -#undef _unroll -#endif -#if KERN_TYPE == 10500 -#undef _unroll -#endif #if KERN_TYPE == 10700 #undef _unroll #endif @@ -128,9 +129,6 @@ #if KERN_TYPE == 12300 #undef _unroll #endif -#if KERN_TYPE == 12400 -#undef _unroll -#endif #if KERN_TYPE == 12900 #undef _unroll #endif @@ -152,100 +150,6 @@ #if KERN_TYPE == 13753 #undef _unroll #endif -#if KERN_TYPE == 14000 -#undef _unroll -#endif -#if KERN_TYPE == 14100 -#undef _unroll -#endif - -#endif -#endif - -#ifdef IS_AMD -#ifdef IS_GPU - -#if KERN_TYPE == 1450 -#undef _unroll -#endif -#if KERN_TYPE == 1460 -#undef _unroll -#endif -#if KERN_TYPE == 1700 -#undef _unroll -#endif -#if KERN_TYPE == 1710 -#undef _unroll -#endif -#if KERN_TYPE == 1720 -#undef _unroll -#endif -#if KERN_TYPE == 1800 -#undef _unroll -#endif -#if KERN_TYPE == 3000 -#undef _unroll -#endif -#if KERN_TYPE == 5200 -#undef _unroll -#endif -#if KERN_TYPE == 6221 -#undef _unroll -#endif -#if KERN_TYPE == 6222 -#undef _unroll -#endif -#if KERN_TYPE == 6223 -#undef _unroll -#endif -#if KERN_TYPE == 6400 -#undef _unroll -#endif -#if KERN_TYPE == 6800 -#undef _unroll -#endif -#if KERN_TYPE == 7400 -#undef _unroll -#endif -#if KERN_TYPE == 8000 -#undef _unroll -#endif -#if KERN_TYPE == 8200 -#undef _unroll -#endif -#if KERN_TYPE == 8900 -#undef _unroll -#endif -#if KERN_TYPE == 10400 -#undef _unroll -#endif -#if KERN_TYPE == 10410 -#undef _unroll -#endif -#if KERN_TYPE == 10700 -#undef _unroll -#endif -#if KERN_TYPE == 10800 -#undef _unroll -#endif -#if KERN_TYPE == 10900 -#undef _unroll -#endif -#if KERN_TYPE == 12800 -#undef _unroll -#endif -#if KERN_TYPE == 12900 -#undef _unroll -#endif -#if KERN_TYPE == 13721 -#undef _unroll -#endif -#if KERN_TYPE == 13722 -#undef _unroll -#endif -#if KERN_TYPE == 13723 -#undef _unroll -#endif #if KERN_TYPE == 13800 #undef _unroll #endif @@ -256,33 +160,48 @@ #undef _unroll #endif +// nvidia specific + +#ifdef IS_NV +#ifdef IS_GPU + +#if KERN_TYPE == 1500 +#undef _unroll +#endif +#if KERN_TYPE == 10500 +#undef _unroll +#endif + #endif #endif -// Some algorithms break due to loop unrolling, it's unknown why, probably compiler bugs +// amd specific #ifdef IS_AMD #ifdef IS_GPU -#if KERN_TYPE == 1750 +#if KERN_TYPE == 1700 #undef _unroll #endif -#if KERN_TYPE == 1760 +#if KERN_TYPE == 1710 #undef _unroll #endif -#if KERN_TYPE == 6500 +#if KERN_TYPE == 5200 #undef _unroll #endif -#if KERN_TYPE == 7100 +#if KERN_TYPE == 8000 #undef _unroll #endif -#if KERN_TYPE == 9600 +#if KERN_TYPE == 10400 #undef _unroll #endif -#if KERN_TYPE == 12200 +#if KERN_TYPE == 10410 #undef _unroll #endif -#if KERN_TYPE == 12300 +#if KERN_TYPE == 10800 +#undef _unroll +#endif +#if KERN_TYPE == 12800 #undef _unroll #endif