2015-12-04 14:47:52 +00:00
|
|
|
/**
|
|
|
|
* Author......: Jens Steube <jens.steube@gmail.com>
|
|
|
|
* License.....: MIT
|
|
|
|
*/
|
|
|
|
|
2016-05-10 08:22:13 +00:00
|
|
|
//fails on intel opencl sdk
|
|
|
|
//#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
|
|
|
|
//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
2016-05-09 19:32:12 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* device type
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define DEVICE_TYPE_CPU 2
|
|
|
|
#define DEVICE_TYPE_GPU 4
|
|
|
|
#define DEVICE_TYPE_ACCEL 8
|
|
|
|
|
|
|
|
#if DEVICE_TYPE == DEVICE_TYPE_CPU
|
|
|
|
#define IS_CPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_GPU
|
|
|
|
#define IS_GPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
|
|
|
|
#define IS_ACCEL
|
2016-04-20 19:19:15 +00:00
|
|
|
#endif
|
|
|
|
|
2015-12-04 14:47:52 +00:00
|
|
|
/**
|
|
|
|
* vendor specific
|
|
|
|
*/
|
|
|
|
|
2016-05-09 19:32:12 +00:00
|
|
|
#if VENDOR_ID == (1 << 0)
|
2015-12-04 14:47:52 +00:00
|
|
|
#define IS_AMD
|
2016-05-14 17:45:51 +00:00
|
|
|
//#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 1)
|
|
|
|
#define IS_APPLE
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 2)
|
|
|
|
#define IS_INTEL_BEIGNET
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 3)
|
|
|
|
#define IS_INTEL_SDK
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 4)
|
|
|
|
#define IS_MESA
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 5)
|
2015-12-04 14:47:52 +00:00
|
|
|
#define IS_NV
|
2016-05-14 17:45:51 +00:00
|
|
|
//#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 6)
|
|
|
|
#define IS_POCL
|
|
|
|
#define IS_GENERIC
|
2016-01-13 16:10:40 +00:00
|
|
|
#else
|
2016-01-07 19:14:34 +00:00
|
|
|
#define IS_GENERIC
|
2016-01-04 12:17:20 +00:00
|
|
|
#endif
|
|
|
|
|
2015-12-04 14:47:52 +00:00
|
|
|
/**
|
|
|
|
* AMD specific
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
2016-01-04 12:17:20 +00:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
2015-12-04 14:47:52 +00:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
|
|
|
|
#endif
|
|
|
|
|
2015-12-15 11:04:22 +00:00
|
|
|
/**
|
2016-05-09 19:32:12 +00:00
|
|
|
* Unrolling is generally enabled, for all device types and hash modes
|
|
|
|
* There's a few exception when it's better not to unroll
|
2015-12-15 11:04:22 +00:00
|
|
|
*/
|
2015-12-04 14:47:52 +00:00
|
|
|
|
2016-05-09 19:32:12 +00:00
|
|
|
// Some algorithms run into too much register pressure due to loop unrolling
|
|
|
|
|
2015-12-04 14:47:52 +00:00
|
|
|
#ifdef IS_NV
|
2016-05-09 19:32:12 +00:00
|
|
|
#ifdef IS_GPU
|
|
|
|
|
|
|
|
#if KERN_TYPE == 1500
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1800
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 3000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6221
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6222
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6223
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6500
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 7100
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 7400
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-09 21:14:26 +00:00
|
|
|
#if KERN_TYPE == 7900
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-09 19:32:12 +00:00
|
|
|
#if KERN_TYPE == 8200
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 10400
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 10500
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 10700
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12300
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12400
|
|
|
|
#undef _unroll
|
2015-12-04 14:47:52 +00:00
|
|
|
#endif
|
2016-01-23 14:32:31 +00:00
|
|
|
|
2016-05-09 19:32:12 +00:00
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
#ifdef IS_GPU
|
|
|
|
|
|
|
|
#if KERN_TYPE == 3200
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 5200
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6100
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6221
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6222
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6223
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6400
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6500
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6800
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 7100
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 7400
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 8000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 8200
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 10900
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 11600
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12300
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12800
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12900
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 13000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-01-23 14:32:31 +00:00
|
|
|
|
2016-05-09 19:32:12 +00:00
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Some algorithms break due to loop unrolling, it's unknown why, probably compiler bugs
|
|
|
|
// Can overlap with above cases
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
#ifdef IS_GPU
|
|
|
|
|
|
|
|
#if KERN_TYPE == 1750
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1760
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6500
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 7100
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 9600
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12200
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12300
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
2016-04-19 21:09:27 +00:00
|
|
|
#endif
|