From a5bc4e7f714acbf81ecdb3f6a0d16f69ffc491b1 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 25 Jul 2025 10:57:13 +0200 Subject: [PATCH] Removed unsed macros in OpenCL/inc_vendor.h. Re-enabled USE_BITSELECT for Intel GPUs. Optimize vector version of hc_swap32() to allow using USE_SWIZZLE based technique on OpenCL in case USE_BITSELECT or USE_ROTATE is not set. --- OpenCL/inc_common.cl | 37 +++++++++++++++++++++++++++++++++---- OpenCL/inc_vendor.h | 10 +--------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 8e4b8b391..ef923452e 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1089,10 +1089,39 @@ DECLSPEC u32x hc_swap32 (const u32x v) rotate (v, make_u32x ( 8)), make_u32x (0x00ff00ff)); #else - r = ((v & make_u32x (0xff000000)) >> 24) - | ((v & make_u32x (0x00ff0000)) >> 8) - | ((v & make_u32x (0x0000ff00)) << 8) - | ((v & make_u32x (0x000000ff)) << 24); + + #if VECT_SIZE == 1 + r = hc_swap32_S (v); + #endif + + #if VECT_SIZE >= 2 + r.s0 = hc_swap32_S (v.s0); + r.s1 = hc_swap32_S (v.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = hc_swap32_S (v.s2); + r.s3 = hc_swap32_S (v.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = hc_swap32_S (v.s4); + r.s5 = hc_swap32_S (v.s5); + r.s6 = hc_swap32_S (v.s6); + r.s7 = hc_swap32_S (v.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = hc_swap32_S (v.s8); + r.s9 = hc_swap32_S (v.s9); + r.sa = hc_swap32_S (v.sa); + r.sb = hc_swap32_S (v.sb); + r.sc = hc_swap32_S (v.sc); + r.sd = hc_swap32_S (v.sd); + r.se = hc_swap32_S (v.se); + r.sf = hc_swap32_S (v.sf); + #endif + #endif #endif diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index b23c36d14..aaa38fe96 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -186,16 +186,8 @@ using namespace metal; #define USE_ROTATE #endif -#ifdef IS_INTEL_SDK -#ifdef IS_CPU -#define USE_BITSELECT -#define USE_ROTATE -#endif -#endif - #ifdef IS_OPENCL -//Slow on Intel -//#define USE_BITSELECT +#define USE_BITSELECT #define USE_ROTATE #define USE_SWIZZLE #endif