2015-12-04 14:47:52 +00:00
|
|
|
/**
|
2016-09-11 20:20:15 +00:00
|
|
|
* Author......: See docs/credits.txt
|
2015-12-04 14:47:52 +00:00
|
|
|
* License.....: MIT
|
|
|
|
*/
|
|
|
|
|
2016-05-09 19:32:12 +00:00
|
|
|
/**
|
|
|
|
* device type
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define DEVICE_TYPE_CPU 2
|
|
|
|
#define DEVICE_TYPE_GPU 4
|
|
|
|
#define DEVICE_TYPE_ACCEL 8
|
|
|
|
|
|
|
|
#if DEVICE_TYPE == DEVICE_TYPE_CPU
|
|
|
|
#define IS_CPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_GPU
|
|
|
|
#define IS_GPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
|
|
|
|
#define IS_ACCEL
|
2016-04-20 19:19:15 +00:00
|
|
|
#endif
|
|
|
|
|
2017-01-21 14:37:44 +00:00
|
|
|
#if DEVICE_TYPE == DEVICE_TYPE_CPU
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_GPU
|
|
|
|
#define REAL_SHM
|
|
|
|
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
|
|
|
|
#endif
|
|
|
|
|
2017-09-05 14:45:20 +00:00
|
|
|
#ifdef REAL_SHM
|
|
|
|
#define SHM_TYPE __local
|
2017-09-05 15:37:20 +00:00
|
|
|
#define SCR_TYPE __local
|
2017-09-05 14:45:20 +00:00
|
|
|
#else
|
|
|
|
#define SHM_TYPE __constant
|
2017-09-05 15:37:20 +00:00
|
|
|
#define SCR_TYPE
|
2017-09-05 14:45:20 +00:00
|
|
|
#endif
|
|
|
|
|
2015-12-04 14:47:52 +00:00
|
|
|
/**
|
|
|
|
* vendor specific
|
|
|
|
*/
|
|
|
|
|
2016-05-09 19:32:12 +00:00
|
|
|
#if VENDOR_ID == (1 << 0)
|
2017-08-22 16:47:53 +00:00
|
|
|
#if AMD_ROCM == 0
|
2015-12-04 14:47:52 +00:00
|
|
|
#define IS_AMD
|
2017-09-07 18:33:43 +00:00
|
|
|
#define AMD_GCN 0
|
2017-08-22 16:47:53 +00:00
|
|
|
#else
|
|
|
|
#define IS_AMD
|
2017-09-07 18:33:43 +00:00
|
|
|
#if defined __gfx600__ || defined __gfx601__
|
|
|
|
#define AMD_GCN 1
|
|
|
|
#elif defined __gfx700__ || defined __gfx701__ || defined __gfx702__ || defined __gfx703__
|
|
|
|
#define AMD_GCN 2
|
|
|
|
#elif defined __gfx800__ || defined __gfx801__ || defined __gfx802__ || defined __gfx803__ || defined __gfx804__ || defined __gfx810__
|
|
|
|
#define AMD_GCN 3
|
2017-09-08 10:46:00 +00:00
|
|
|
// According to AMD docs, GCN 3 and 4 are the same
|
2017-09-07 18:33:43 +00:00
|
|
|
#elif defined __gfx900__ || defined __gfx901__ || defined __gfx902__ || defined __gfx903__
|
|
|
|
#define AMD_GCN 5
|
|
|
|
#else
|
|
|
|
#define AMD_GCN 0
|
2017-08-27 17:36:07 +00:00
|
|
|
#endif
|
2017-08-22 16:47:53 +00:00
|
|
|
#endif
|
2016-05-14 17:45:51 +00:00
|
|
|
#elif VENDOR_ID == (1 << 1)
|
|
|
|
#define IS_APPLE
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 2)
|
|
|
|
#define IS_INTEL_BEIGNET
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 3)
|
|
|
|
#define IS_INTEL_SDK
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 4)
|
|
|
|
#define IS_MESA
|
|
|
|
#define IS_GENERIC
|
|
|
|
#elif VENDOR_ID == (1 << 5)
|
2015-12-04 14:47:52 +00:00
|
|
|
#define IS_NV
|
2016-05-14 17:45:51 +00:00
|
|
|
#elif VENDOR_ID == (1 << 6)
|
|
|
|
#define IS_POCL
|
|
|
|
#define IS_GENERIC
|
2016-01-13 16:10:40 +00:00
|
|
|
#else
|
2016-01-07 19:14:34 +00:00
|
|
|
#define IS_GENERIC
|
2016-01-04 12:17:20 +00:00
|
|
|
#endif
|
|
|
|
|
2018-02-06 18:12:24 +00:00
|
|
|
/**
|
|
|
|
* function declarations can have a large influence depending on the opencl runtime
|
|
|
|
*/
|
|
|
|
|
2018-02-06 21:05:15 +00:00
|
|
|
|
|
|
|
#ifdef IS_NV
|
|
|
|
#define DECLSPEC
|
|
|
|
#else
|
2018-02-07 13:16:27 +00:00
|
|
|
#define DECLSPEC inline
|
2018-02-06 21:05:15 +00:00
|
|
|
#endif
|
2018-02-06 18:12:24 +00:00
|
|
|
|
2015-12-04 14:47:52 +00:00
|
|
|
/**
|
|
|
|
* AMD specific
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
2016-01-04 12:17:20 +00:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
2015-12-04 14:47:52 +00:00
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
|
|
|
|
#endif
|
|
|
|
|
2015-12-15 11:04:22 +00:00
|
|
|
/**
|
2016-05-09 19:32:12 +00:00
|
|
|
* Unrolling is generally enabled, for all device types and hash modes
|
|
|
|
* There's a few exception when it's better not to unroll
|
2016-11-13 19:58:28 +00:00
|
|
|
* Some algorithms run into too much register pressure due to loop unrolling
|
2015-12-15 11:04:22 +00:00
|
|
|
*/
|
2015-12-04 14:47:52 +00:00
|
|
|
|
2016-11-13 19:58:28 +00:00
|
|
|
// generic vendors: those algos have shown that they produce better results on both amd and nv when not unrolled
|
|
|
|
// so we can assume they will produce better results on other vendors as well
|
2016-05-09 19:32:12 +00:00
|
|
|
|
2016-11-13 19:58:28 +00:00
|
|
|
#if KERN_TYPE == 1420
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1450
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1460
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1720
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1750
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1760
|
2016-05-09 19:32:12 +00:00
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 1800
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6221
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6222
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 6223
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-10-21 07:51:41 +00:00
|
|
|
#if KERN_TYPE == 6400
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-09 19:32:12 +00:00
|
|
|
#if KERN_TYPE == 6500
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-10-21 08:14:15 +00:00
|
|
|
#if KERN_TYPE == 6800
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-28 10:30:39 +00:00
|
|
|
#if KERN_TYPE == 7100
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-09 19:32:12 +00:00
|
|
|
#if KERN_TYPE == 7400
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2017-06-29 13:50:46 +00:00
|
|
|
#if KERN_TYPE == 7900
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-10-20 20:53:47 +00:00
|
|
|
#if KERN_TYPE == 8900
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-09 19:32:12 +00:00
|
|
|
#if KERN_TYPE == 10700
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-20 22:39:22 +00:00
|
|
|
#if KERN_TYPE == 13721
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 13722
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 13723
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-10-21 08:14:15 +00:00
|
|
|
#if KERN_TYPE == 13751
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 13752
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 13753
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-11-13 19:58:28 +00:00
|
|
|
#if KERN_TYPE == 13800
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2017-10-26 11:41:47 +00:00
|
|
|
#if KERN_TYPE == 15700
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-01-23 14:32:31 +00:00
|
|
|
|
2016-11-13 19:58:28 +00:00
|
|
|
// nvidia specific
|
2016-05-09 19:32:12 +00:00
|
|
|
|
2016-11-13 19:58:28 +00:00
|
|
|
#ifdef IS_NV
|
2016-05-09 19:32:12 +00:00
|
|
|
#ifdef IS_GPU
|
|
|
|
|
2016-11-13 19:58:28 +00:00
|
|
|
#if KERN_TYPE == 1500
|
2016-11-12 20:20:03 +00:00
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-11-13 23:56:35 +00:00
|
|
|
#if KERN_TYPE == 3000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 14000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-11-13 19:58:28 +00:00
|
|
|
|
2016-11-12 20:20:03 +00:00
|
|
|
#endif
|
|
|
|
#endif
|
2016-11-13 19:58:28 +00:00
|
|
|
|
|
|
|
// amd specific
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
#ifdef IS_GPU
|
|
|
|
|
2017-07-24 11:13:35 +00:00
|
|
|
#if KERN_TYPE == 8000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2017-10-26 11:41:47 +00:00
|
|
|
#if KERN_TYPE == 8200
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
#if KERN_TYPE == 12300
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2017-08-17 12:03:55 +00:00
|
|
|
#if KERN_TYPE == 14100
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2017-08-17 09:25:34 +00:00
|
|
|
#if KERN_TYPE == 15300
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2017-09-20 17:19:23 +00:00
|
|
|
#if KERN_TYPE == 15900
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
2016-05-09 19:32:12 +00:00
|
|
|
|
|
|
|
#endif
|
2016-04-19 21:09:27 +00:00
|
|
|
#endif
|
2016-11-14 09:50:47 +00:00
|
|
|
|
|
|
|
// apple specific
|
|
|
|
|
|
|
|
#ifdef IS_APPLE
|
|
|
|
|
|
|
|
#if KERN_TYPE == 5000
|
|
|
|
#undef _unroll
|
|
|
|
#endif
|
|
|
|
|
2016-11-14 18:28:07 +00:00
|
|
|
#endif
|