mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-24 15:38:47 +00:00
Refactored HIP kernel code for improved performance and cleanup
- Replaced inline asm in hc_byte_perm() with __builtin_amdgcn_perm() - Replaced inline asm in hc_bytealign() with __builtin_amdgcn_alignbyte() - Defined HC_INLINE as default for HIP, significantly boosting kernel performance of pure kernels - Removed IS_ROCM from inc_vendor.h as it's no longer needed - Removed backend-specific code from several hash-modes and inc_rp_optimized.cl, as hc_bytealign_S() is now available on all backends
This commit is contained in:
parent
5ffbc5edc4
commit
06344910a4
31862
OpenCL/inc_common.cl
31862
OpenCL/inc_common.cl
File diff suppressed because it is too large
Load Diff
@ -24,7 +24,7 @@ DECLSPEC u64 blake2b_rot16_S (const u64 a)
|
||||
|
||||
return out.v64;
|
||||
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
|
||||
vconv64_t in;
|
||||
|
||||
@ -98,7 +98,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a)
|
||||
|
||||
return out.v64;
|
||||
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
|
||||
vconv64_t in;
|
||||
|
||||
|
@ -77,7 +77,7 @@ DECLSPEC u32 blake2s_rot08_S (const u32 a)
|
||||
|
||||
return out.v32;
|
||||
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
|
||||
vconv32_t in;
|
||||
|
||||
|
@ -781,7 +781,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P
|
||||
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 src_r00 = src_r0[0];
|
||||
const u32 src_r01 = src_r0[1];
|
||||
const u32 src_r02 = src_r0[2];
|
||||
@ -882,123 +881,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P
|
||||
s0 = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
const u32 src_r00 = src_r0[0];
|
||||
const u32 src_r01 = src_r0[1];
|
||||
const u32 src_r02 = src_r0[2];
|
||||
const u32 src_r03 = src_r0[3];
|
||||
const u32 src_r10 = src_r1[0];
|
||||
const u32 src_r11 = src_r1[1];
|
||||
const u32 src_r12 = src_r1[2];
|
||||
const u32 src_r13 = src_r1[3];
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
s7 = hc_byte_perm_S (src_r12, src_r13, selector);
|
||||
s6 = hc_byte_perm_S (src_r11, src_r12, selector);
|
||||
s5 = hc_byte_perm_S (src_r10, src_r11, selector);
|
||||
s4 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s3 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s2 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s1 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s0 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
s7 = hc_byte_perm_S (src_r11, src_r12, selector);
|
||||
s6 = hc_byte_perm_S (src_r10, src_r11, selector);
|
||||
s5 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s4 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s3 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s2 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s1 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
s7 = hc_byte_perm_S (src_r10, src_r11, selector);
|
||||
s6 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s5 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s4 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s3 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s2 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
s7 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s6 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s5 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s4 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s3 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
|
||||
break;
|
||||
|
||||
case 4:
|
||||
s7 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s6 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s5 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s4 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
s7 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s6 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s5 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 6:
|
||||
s7 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s6 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s5 = 0;
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
s7 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s6 = 0;
|
||||
s5 = 0;
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
buf0[0] = src_l0[0] | s0;
|
||||
buf0[1] = src_l0[1] | s1;
|
||||
|
@ -121,10 +121,6 @@ using namespace metal;
|
||||
#define IS_GENERIC
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD && HAS_VPERM == 1
|
||||
#define IS_ROCM
|
||||
#endif
|
||||
|
||||
#define LOCAL_MEM_TYPE_LOCAL 1
|
||||
#define LOCAL_MEM_TYPE_GLOBAL 2
|
||||
|
||||
@ -159,7 +155,7 @@ using namespace metal;
|
||||
#elif defined IS_CUDA
|
||||
#define DECLSPEC __device__
|
||||
#elif defined IS_HIP
|
||||
#define DECLSPEC __device__
|
||||
#define DECLSPEC __device__ HC_INLINE
|
||||
#else
|
||||
#define DECLSPEC
|
||||
#endif
|
||||
@ -190,11 +186,6 @@ using namespace metal;
|
||||
#define USE_ROTATE
|
||||
#endif
|
||||
|
||||
#ifdef IS_ROCM
|
||||
#define USE_BITSELECT
|
||||
#define USE_ROTATE
|
||||
#endif
|
||||
|
||||
#ifdef IS_INTEL_SDK
|
||||
#ifdef IS_CPU
|
||||
//#define USE_BITSELECT
|
||||
|
@ -32,43 +32,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
@ -145,45 +118,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, in4, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
@ -258,35 +203,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A
|
||||
u32 tmp1;
|
||||
u32 tmp2;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, 0, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
|
@ -31,44 +31,17 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
switch (div)
|
||||
@ -144,45 +117,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, in4, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
@ -257,35 +202,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A
|
||||
u32 tmp1;
|
||||
u32 tmp2;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, 0, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
|
@ -231,47 +231,18 @@ DECLSPEC void append_salt (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
|
||||
u32 tmp4;
|
||||
u32 tmp5;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
tmp5 = hc_bytealign (in4, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
tmp5 = hc_byte_perm (in4, 0, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, in4, offset);
|
||||
tmp5 = hc_bytealign_S (in4, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
|
@ -28,43 +28,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
@ -140,47 +113,18 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV
|
||||
u32 tmp2;
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, in2, offset);
|
||||
tmp3 = hc_bytealign_S (in2, in3, offset);
|
||||
tmp4 = hc_bytealign_S (in3, in4, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
switch (div)
|
||||
@ -254,35 +198,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A
|
||||
u32 tmp1;
|
||||
u32 tmp2;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, 0, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_S ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign_S (in0, in1, offset);
|
||||
tmp2 = hc_bytealign_S (in1, 0, offset);
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
||||
|
@ -45,30 +45,11 @@ DECLSPEC u32 memcat16 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (0, in3, selector);
|
||||
#endif
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -172,30 +153,11 @@ DECLSPEC u32 memcat16c (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (0, in3, selector);
|
||||
#endif
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
|
||||
|
||||
u32 carry[4] = { 0 };
|
||||
|
||||
@ -336,32 +298,12 @@ DECLSPEC u32 memcat16s (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be (in3, in4, offset);
|
||||
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
|
||||
const u32 tmp5 = hc_byte_perm_S (0, in4, selector);
|
||||
#endif
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
|
||||
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -477,32 +419,12 @@ DECLSPEC u32 memcat16sc (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS con
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be (in3, in4, offset);
|
||||
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
|
||||
const u32 tmp5 = hc_byte_perm_S (0, in4, selector);
|
||||
#endif
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
|
||||
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
|
||||
|
||||
u32 carry[5] = { 0 };
|
||||
|
||||
@ -784,30 +706,11 @@ DECLSPEC u32 memcat20 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (0, in3, selector);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -950,30 +853,11 @@ DECLSPEC u32 memcat20_x80 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80000000;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -1116,32 +1000,12 @@ DECLSPEC u32 memcat24 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
|
||||
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
|
||||
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
|
||||
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
|
||||
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
|
||||
const u32 tmp5 = hc_byte_perm_S (0, in4, selector);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
|
@ -234,34 +234,13 @@ DECLSPEC void make_sc (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_
|
||||
|
||||
u32 i;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||
sc[idx++] = pw[i]
|
||||
| hc_bytealign_be (bl[0], 0, pm4);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be (bl[i], bl[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign_be (sc[0], bl[i - 1], pm4);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be (sc[i], sc[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8));
|
||||
#endif
|
||||
|
||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||
sc[idx++] = pw[i]
|
||||
| hc_byte_perm ( 0, bl[0], selector);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_byte_perm (bl[i - 1], bl[i], selector);
|
||||
sc[idx++] = hc_byte_perm (bl[i - 1], sc[0], selector);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_byte_perm (sc[i - 1], sc[i], selector);
|
||||
sc[idx++] = hc_byte_perm (sc[i - 1], 0, selector);
|
||||
#endif
|
||||
| hc_bytealign_be_S (bl[0], 0, pm4);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be_S (bl[i], bl[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign_be_S (sc[0], bl[i - 1], pm4);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be_S (sc[i], sc[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign_be_S ( 0, sc[i - 1], pm4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -272,27 +251,10 @@ DECLSPEC void make_pt_with_offset (PRIVATE_AS u32 *pt, const u32 offset, LOCAL_A
|
||||
const u32 om = m % 4;
|
||||
const u32 od = m / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
|
||||
pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8));
|
||||
#endif
|
||||
pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
|
||||
pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
|
||||
pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);
|
||||
pt[3] = hc_byte_perm (sc[od + 3], sc[od + 4], selector);
|
||||
#endif
|
||||
pt[0] = hc_bytealign_be_S (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = hc_bytealign_be_S (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = hc_bytealign_be_S (sc[od + 3], sc[od + 2], om);
|
||||
pt[3] = hc_bytealign_be_S (sc[od + 4], sc[od + 3], om);
|
||||
}
|
||||
|
||||
DECLSPEC void make_w_with_offset (PRIVATE_AS ctx_t *ctx, const u32 W_len, const u32 offset, LOCAL_AS const u32 *sc, const u32 pwbl_len, PRIVATE_AS u32 *iv, PRIVATE_AS const u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
@ -42,24 +42,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp0 = hc_byte_perm (append, 0, selector);
|
||||
tmp1 = hc_byte_perm (0, append, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_be_S (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be_S (append, 0, func_len);
|
||||
|
||||
u32 carry = 0;
|
||||
|
||||
|
@ -37,24 +37,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp0 = hc_byte_perm (append, 0, selector);
|
||||
tmp1 = hc_byte_perm (0, append, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_be_S (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be_S (append, 0, func_len);
|
||||
|
||||
u32 carry = 0;
|
||||
|
||||
|
@ -51,7 +51,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
|
||||
u32x tmp15;
|
||||
u32x tmp16;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
@ -69,36 +68,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
|
||||
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = hc_byte_perm ( 0, carry[15], selector);
|
||||
#endif
|
||||
|
||||
carry[ 0] = 0;
|
||||
carry[ 1] = 0;
|
||||
|
@ -49,7 +49,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
|
||||
u32x tmp15;
|
||||
u32x tmp16;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
@ -67,36 +66,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
|
||||
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = hc_byte_perm ( 0, carry[15], selector);
|
||||
#endif
|
||||
|
||||
carry[ 0] = 0;
|
||||
carry[ 1] = 0;
|
||||
|
@ -48,7 +48,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
|
||||
u32x tmp15;
|
||||
u32x tmp16;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
@ -66,36 +65,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
|
||||
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = hc_byte_perm ( 0, carry[15], selector);
|
||||
#endif
|
||||
|
||||
carry[ 0] = 0;
|
||||
carry[ 1] = 0;
|
||||
|
@ -42,31 +42,6 @@ typedef struct gpg_tmp
|
||||
|
||||
} gpg_tmp_t;
|
||||
|
||||
|
||||
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
|
||||
{
|
||||
const int c_mod_4 = c & 3;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
|
||||
#endif
|
||||
|
||||
const u32 r = hc_byte_perm (b, a, selector);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
|
||||
{
|
||||
const u32 start_index = (offset - 1) >> 2;
|
||||
@ -74,11 +49,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
|
||||
const int off_mod_4 = offset & 3;
|
||||
const int off_minus_4 = 4 - off_mod_4;
|
||||
|
||||
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
|
||||
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
|
||||
|
||||
for (u32 idx = 1; idx < count; idx++)
|
||||
{
|
||||
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
|
||||
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -188,11 +163,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
|
||||
|
||||
u32 expected_sha1[5];
|
||||
|
||||
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
|
||||
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));
|
||||
|
||||
|
@ -47,31 +47,6 @@ typedef struct gpg_tmp
|
||||
|
||||
} gpg_tmp_t;
|
||||
|
||||
|
||||
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
|
||||
{
|
||||
const int c_mod_4 = c & 3;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
|
||||
#endif
|
||||
|
||||
const u32 r = hc_byte_perm (b, a, selector);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
|
||||
{
|
||||
const u32 start_index = (offset - 1) >> 2;
|
||||
@ -79,11 +54,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
|
||||
const int off_mod_4 = offset & 3;
|
||||
const int off_minus_4 = 4 - off_mod_4;
|
||||
|
||||
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
|
||||
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
|
||||
|
||||
for (u32 idx = 1; idx < count; idx++)
|
||||
{
|
||||
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
|
||||
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,11 +168,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
|
||||
|
||||
u32 expected_sha1[5];
|
||||
|
||||
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
|
||||
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));
|
||||
|
||||
|
@ -43,31 +43,6 @@ typedef struct gpg_tmp
|
||||
|
||||
} gpg_tmp_t;
|
||||
|
||||
|
||||
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
|
||||
{
|
||||
const int c_mod_4 = c & 3;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
|
||||
#endif
|
||||
|
||||
const u32 r = hc_byte_perm (b, a, selector);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
|
||||
{
|
||||
const u32 start_index = (offset - 1) >> 2;
|
||||
@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
|
||||
const int off_mod_4 = offset & 3;
|
||||
const int off_minus_4 = 4 - off_mod_4;
|
||||
|
||||
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
|
||||
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
|
||||
|
||||
for (u32 idx = 1; idx < count; idx++)
|
||||
{
|
||||
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
|
||||
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,11 +164,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
|
||||
|
||||
u32 expected_sha1[5];
|
||||
|
||||
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
|
||||
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));
|
||||
|
||||
|
@ -43,31 +43,6 @@ typedef struct gpg_tmp
|
||||
|
||||
} gpg_tmp_t;
|
||||
|
||||
|
||||
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
|
||||
{
|
||||
const int c_mod_4 = c & 3;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
|
||||
#endif
|
||||
|
||||
const u32 r = hc_byte_perm (b, a, selector);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
|
||||
{
|
||||
const u32 start_index = (offset - 1) >> 2;
|
||||
@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
|
||||
const int off_mod_4 = offset & 3;
|
||||
const int off_minus_4 = 4 - off_mod_4;
|
||||
|
||||
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
|
||||
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
|
||||
|
||||
for (u32 idx = 1; idx < count; idx++)
|
||||
{
|
||||
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
|
||||
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -168,11 +143,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
|
||||
|
||||
u32 expected_sha1[5];
|
||||
|
||||
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
|
||||
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
|
||||
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
|
||||
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
|
||||
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
|
||||
|
||||
|
||||
|
||||
|
@ -145,24 +145,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp0 = hc_byte_perm (append, 0, selector);
|
||||
tmp1 = hc_byte_perm (0, append, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_be_S (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be_S (append, 0, func_len);
|
||||
|
||||
u32 carry = 0;
|
||||
|
||||
|
@ -58,24 +58,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||
#endif
|
||||
|
||||
tmp0 = hc_byte_perm (append, 0, selector);
|
||||
tmp1 = hc_byte_perm (0, append, selector);
|
||||
#endif
|
||||
tmp0 = hc_bytealign_be_S (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be_S (append, 0, func_len);
|
||||
|
||||
u32 carry = 0;
|
||||
|
||||
|
@ -29,7 +29,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -67,56 +66,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
w0[3] = 0;
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
w0[3] = 0;
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
@ -26,7 +26,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -64,56 +63,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
w0[3] = 0;
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
w0[3] = 0;
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
@ -28,7 +28,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -66,56 +65,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
w0[3] = 0;
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
|
||||
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
w0[3] = 0;
|
||||
w0[2] = 0;
|
||||
w0[1] = 0;
|
||||
w0[0] = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||
|
Loading…
Reference in New Issue
Block a user