1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-24 15:38:47 +00:00

Refactored HIP kernel code for improved performance and cleanup

- Replaced inline asm in hc_byte_perm() with __builtin_amdgcn_perm()
- Replaced inline asm in hc_bytealign() with __builtin_amdgcn_alignbyte()
- Defined HC_INLINE as default for HIP, significantly boosting kernel performance of pure kernels
- Removed IS_ROCM from inc_vendor.h as it's no longer needed
- Removed backend-specific code from several hash-modes and inc_rp_optimized.cl, as hc_bytealign_S() is now available on all backends
This commit is contained in:
Jens Steube 2025-07-10 13:31:00 +02:00
parent 5ffbc5edc4
commit 06344910a4
28 changed files with 324 additions and 32902 deletions

File diff suppressed because it is too large Load Diff

View File

@ -24,7 +24,7 @@ DECLSPEC u64 blake2b_rot16_S (const u64 a)
return out.v64;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
#elif (defined IS_AMD || defined IS_HIP)
vconv64_t in;
@ -98,7 +98,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a)
return out.v64;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
#elif (defined IS_AMD || defined IS_HIP)
vconv64_t in;

View File

@ -77,7 +77,7 @@ DECLSPEC u32 blake2s_rot08_S (const u32 a)
return out.v32;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
#elif (defined IS_AMD || defined IS_HIP)
vconv32_t in;

View File

@ -781,7 +781,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 src_r00 = src_r0[0];
const u32 src_r01 = src_r0[1];
const u32 src_r02 = src_r0[2];
@ -882,123 +881,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P
s0 = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
const u32 src_r00 = src_r0[0];
const u32 src_r01 = src_r0[1];
const u32 src_r02 = src_r0[2];
const u32 src_r03 = src_r0[3];
const u32 src_r10 = src_r1[0];
const u32 src_r11 = src_r1[1];
const u32 src_r12 = src_r1[2];
const u32 src_r13 = src_r1[3];
switch (offset_switch)
{
case 0:
s7 = hc_byte_perm_S (src_r12, src_r13, selector);
s6 = hc_byte_perm_S (src_r11, src_r12, selector);
s5 = hc_byte_perm_S (src_r10, src_r11, selector);
s4 = hc_byte_perm_S (src_r03, src_r10, selector);
s3 = hc_byte_perm_S (src_r02, src_r03, selector);
s2 = hc_byte_perm_S (src_r01, src_r02, selector);
s1 = hc_byte_perm_S (src_r00, src_r01, selector);
s0 = hc_byte_perm_S ( 0, src_r00, selector);
break;
case 1:
s7 = hc_byte_perm_S (src_r11, src_r12, selector);
s6 = hc_byte_perm_S (src_r10, src_r11, selector);
s5 = hc_byte_perm_S (src_r03, src_r10, selector);
s4 = hc_byte_perm_S (src_r02, src_r03, selector);
s3 = hc_byte_perm_S (src_r01, src_r02, selector);
s2 = hc_byte_perm_S (src_r00, src_r01, selector);
s1 = hc_byte_perm_S ( 0, src_r00, selector);
s0 = 0;
break;
case 2:
s7 = hc_byte_perm_S (src_r10, src_r11, selector);
s6 = hc_byte_perm_S (src_r03, src_r10, selector);
s5 = hc_byte_perm_S (src_r02, src_r03, selector);
s4 = hc_byte_perm_S (src_r01, src_r02, selector);
s3 = hc_byte_perm_S (src_r00, src_r01, selector);
s2 = hc_byte_perm_S ( 0, src_r00, selector);
s1 = 0;
s0 = 0;
break;
case 3:
s7 = hc_byte_perm_S (src_r03, src_r10, selector);
s6 = hc_byte_perm_S (src_r02, src_r03, selector);
s5 = hc_byte_perm_S (src_r01, src_r02, selector);
s4 = hc_byte_perm_S (src_r00, src_r01, selector);
s3 = hc_byte_perm_S ( 0, src_r00, selector);
s2 = 0;
s1 = 0;
s0 = 0;
break;
case 4:
s7 = hc_byte_perm_S (src_r02, src_r03, selector);
s6 = hc_byte_perm_S (src_r01, src_r02, selector);
s5 = hc_byte_perm_S (src_r00, src_r01, selector);
s4 = hc_byte_perm_S ( 0, src_r00, selector);
s3 = 0;
s2 = 0;
s1 = 0;
s0 = 0;
break;
case 5:
s7 = hc_byte_perm_S (src_r01, src_r02, selector);
s6 = hc_byte_perm_S (src_r00, src_r01, selector);
s5 = hc_byte_perm_S ( 0, src_r00, selector);
s4 = 0;
s3 = 0;
s2 = 0;
s1 = 0;
s0 = 0;
break;
case 6:
s7 = hc_byte_perm_S (src_r00, src_r01, selector);
s6 = hc_byte_perm_S ( 0, src_r00, selector);
s5 = 0;
s4 = 0;
s3 = 0;
s2 = 0;
s1 = 0;
s0 = 0;
break;
case 7:
s7 = hc_byte_perm_S ( 0, src_r00, selector);
s6 = 0;
s5 = 0;
s4 = 0;
s3 = 0;
s2 = 0;
s1 = 0;
s0 = 0;
break;
}
#endif
buf0[0] = src_l0[0] | s0;
buf0[1] = src_l0[1] | s1;

View File

@ -121,10 +121,6 @@ using namespace metal;
#define IS_GENERIC
#endif
#if defined IS_AMD && HAS_VPERM == 1
#define IS_ROCM
#endif
#define LOCAL_MEM_TYPE_LOCAL 1
#define LOCAL_MEM_TYPE_GLOBAL 2
@ -159,7 +155,7 @@ using namespace metal;
#elif defined IS_CUDA
#define DECLSPEC __device__
#elif defined IS_HIP
#define DECLSPEC __device__
#define DECLSPEC __device__ HC_INLINE
#else
#define DECLSPEC
#endif
@ -190,11 +186,6 @@ using namespace metal;
#define USE_ROTATE
#endif
#ifdef IS_ROCM
#define USE_BITSELECT
#define USE_ROTATE
#endif
#ifdef IS_INTEL_SDK
#ifdef IS_CPU
//#define USE_BITSELECT

View File

@ -32,43 +32,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_
u32 tmp3;
u32 tmp4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, 0, offset);
const u32 div = offset / 4;
@ -145,45 +118,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV
u32 tmp3;
u32 tmp4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = 0x80;
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, in4, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = 0x80;
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, in4, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, in4, offset);
const u32 div = offset / 4;
@ -258,35 +203,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A
u32 tmp1;
u32 tmp2;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, 0, offset);
const u32 div = offset / 4;

View File

@ -31,44 +31,17 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_
u32 tmp3;
u32 tmp4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, 0, offset);
const u32 div = offset / 4;
switch (div)
@ -144,45 +117,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV
u32 tmp3;
u32 tmp4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = 0x80;
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, in4, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = 0x80;
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, in4, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, in4, offset);
const u32 div = offset / 4;
@ -257,35 +202,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A
u32 tmp1;
u32 tmp2;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, 0, offset);
const u32 div = offset / 4;

View File

@ -231,47 +231,18 @@ DECLSPEC void append_salt (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
u32 tmp4;
u32 tmp5;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = append[4];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, in4, offset);
tmp5 = hc_bytealign (in4, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = append[4];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, in4, selector);
tmp5 = hc_byte_perm (in4, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, in4, offset);
tmp5 = hc_bytealign_S (in4, 0, offset);
const u32 div = offset / 4;

View File

@ -28,43 +28,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_
u32 tmp3;
u32 tmp4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, 0, offset);
const u32 div = offset / 4;
@ -140,47 +113,18 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = 0x80;
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, in2, offset);
tmp3 = hc_bytealign (in2, in3, offset);
tmp4 = hc_bytealign (in3, in4, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
u32 in3 = append[3];
u32 in4 = 0x80;
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, in2, selector);
tmp3 = hc_byte_perm (in2, in3, selector);
tmp4 = hc_byte_perm (in3, in4, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, in2, offset);
tmp3 = hc_bytealign_S (in2, in3, offset);
tmp4 = hc_bytealign_S (in3, in4, offset);
const u32 div = offset / 4;
switch (div)
@ -254,35 +198,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A
u32 tmp1;
u32 tmp2;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
tmp0 = hc_bytealign ( 0, in0, offset);
tmp1 = hc_bytealign (in0, in1, offset);
tmp2 = hc_bytealign (in1, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0];
u32 in1 = append[1];
tmp0 = hc_byte_perm ( 0, in0, selector);
tmp1 = hc_byte_perm (in0, in1, selector);
tmp2 = hc_byte_perm (in1, 0, selector);
#endif
tmp0 = hc_bytealign_S ( 0, in0, offset);
tmp1 = hc_bytealign_S (in0, in1, offset);
tmp2 = hc_bytealign_S (in1, 0, offset);
const u32 div = offset / 4;

View File

@ -45,30 +45,11 @@ DECLSPEC u32 memcat16 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const
u32 in2 = append[2];
u32 in3 = append[3];
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (0, in3, selector);
#endif
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
switch (offset / 4)
{
@ -172,30 +153,11 @@ DECLSPEC u32 memcat16c (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons
u32 in2 = append[2];
u32 in3 = append[3];
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (0, in3, selector);
#endif
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
u32 carry[4] = { 0 };
@ -336,32 +298,12 @@ DECLSPEC u32 memcat16s (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons
u32 in3 = append[3];
u32 in4 = append[4];
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be (in3, in4, offset);
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
const u32 tmp5 = hc_byte_perm_S (0, in4, selector);
#endif
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
switch (offset / 4)
{
@ -477,32 +419,12 @@ DECLSPEC u32 memcat16sc (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS con
u32 in3 = append[3];
u32 in4 = append[4];
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be (in3, in4, offset);
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
const u32 tmp5 = hc_byte_perm_S (0, in4, selector);
#endif
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
u32 carry[5] = { 0 };
@ -784,30 +706,11 @@ DECLSPEC u32 memcat20 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const
u32 in2 = append[2];
u32 in3 = append[3];
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (0, in3, selector);
#endif
switch (offset / 4)
{
@ -950,30 +853,11 @@ DECLSPEC u32 memcat20_x80 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
u32 in3 = append[3];
u32 in4 = 0x80000000;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
#endif
switch (offset / 4)
{
@ -1116,32 +1000,12 @@ DECLSPEC u32 memcat24 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const
u32 in3 = append[3];
u32 in4 = append[4];
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
const u32 tmp5 = hc_byte_perm_S (0, in4, selector);
#endif
switch (offset / 4)
{

View File

@ -234,34 +234,13 @@ DECLSPEC void make_sc (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_
u32 i;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i]
| hc_bytealign_be (bl[0], 0, pm4);
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be (bl[i], bl[i - 1], pm4);
sc[idx++] = hc_bytealign_be (sc[0], bl[i - 1], pm4);
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be (sc[i], sc[i - 1], pm4);
sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8));
#endif
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i]
| hc_byte_perm ( 0, bl[0], selector);
for (i = 1; i < bd; i++) sc[idx++] = hc_byte_perm (bl[i - 1], bl[i], selector);
sc[idx++] = hc_byte_perm (bl[i - 1], sc[0], selector);
for (i = 1; i < 4; i++) sc[idx++] = hc_byte_perm (sc[i - 1], sc[i], selector);
sc[idx++] = hc_byte_perm (sc[i - 1], 0, selector);
#endif
| hc_bytealign_be_S (bl[0], 0, pm4);
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be_S (bl[i], bl[i - 1], pm4);
sc[idx++] = hc_bytealign_be_S (sc[0], bl[i - 1], pm4);
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be_S (sc[i], sc[i - 1], pm4);
sc[idx++] = hc_bytealign_be_S ( 0, sc[i - 1], pm4);
}
}
@ -272,27 +251,10 @@ DECLSPEC void make_pt_with_offset (PRIVATE_AS u32 *pt, const u32 offset, LOCAL_A
const u32 om = m % 4;
const u32 od = m / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8));
#endif
pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);
pt[3] = hc_byte_perm (sc[od + 3], sc[od + 4], selector);
#endif
pt[0] = hc_bytealign_be_S (sc[od + 1], sc[od + 0], om);
pt[1] = hc_bytealign_be_S (sc[od + 2], sc[od + 1], om);
pt[2] = hc_bytealign_be_S (sc[od + 3], sc[od + 2], om);
pt[3] = hc_bytealign_be_S (sc[od + 4], sc[od + 3], om);
}
DECLSPEC void make_w_with_offset (PRIVATE_AS ctx_t *ctx, const u32 W_len, const u32 offset, LOCAL_AS const u32 *sc, const u32 pwbl_len, PRIVATE_AS u32 *iv, PRIVATE_AS const u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

View File

@ -42,24 +42,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
u32 tmp0;
u32 tmp1;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector);
#endif
tmp0 = hc_bytealign_be_S (0, append, func_len);
tmp1 = hc_bytealign_be_S (append, 0, func_len);
u32 carry = 0;

View File

@ -37,24 +37,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
u32 tmp0;
u32 tmp1;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector);
#endif
tmp0 = hc_bytealign_be_S (0, append, func_len);
tmp1 = hc_bytealign_be_S (append, 0, func_len);
u32 carry = 0;

View File

@ -51,7 +51,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
u32x tmp15;
u32x tmp16;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@ -69,36 +68,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
tmp16 = hc_bytealign_be (carry[15], 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
tmp16 = hc_byte_perm ( 0, carry[15], selector);
#endif
carry[ 0] = 0;
carry[ 1] = 0;

View File

@ -49,7 +49,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
u32x tmp15;
u32x tmp16;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@ -67,36 +66,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
tmp16 = hc_bytealign_be (carry[15], 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
tmp16 = hc_byte_perm ( 0, carry[15], selector);
#endif
carry[ 0] = 0;
carry[ 1] = 0;

View File

@ -48,7 +48,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
u32x tmp15;
u32x tmp16;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@ -66,36 +65,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
tmp16 = hc_bytealign_be (carry[15], 0, offset);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
tmp16 = hc_byte_perm ( 0, carry[15], selector);
#endif
carry[ 0] = 0;
carry[ 1] = 0;

View File

@ -42,31 +42,6 @@ typedef struct gpg_tmp
} gpg_tmp_t;
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
{
const int c_mod_4 = c & 3;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
#endif
const u32 r = hc_byte_perm (b, a, selector);
#endif
return r;
}
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
{
const u32 start_index = (offset - 1) >> 2;
@ -74,11 +49,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
const int off_mod_4 = offset & 3;
const int off_minus_4 = 4 - off_mod_4;
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
for (u32 idx = 1; idx < count; idx++)
{
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
}
}
@ -188,11 +163,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
u32 expected_sha1[5];
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));

View File

@ -47,31 +47,6 @@ typedef struct gpg_tmp
} gpg_tmp_t;
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
{
const int c_mod_4 = c & 3;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
#endif
const u32 r = hc_byte_perm (b, a, selector);
#endif
return r;
}
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
{
const u32 start_index = (offset - 1) >> 2;
@ -79,11 +54,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
const int off_mod_4 = offset & 3;
const int off_minus_4 = 4 - off_mod_4;
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
for (u32 idx = 1; idx < count; idx++)
{
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
}
}
@ -193,11 +168,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
u32 expected_sha1[5];
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));

View File

@ -43,31 +43,6 @@ typedef struct gpg_tmp
} gpg_tmp_t;
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
{
const int c_mod_4 = c & 3;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
#endif
const u32 r = hc_byte_perm (b, a, selector);
#endif
return r;
}
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
{
const u32 start_index = (offset - 1) >> 2;
@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
const int off_mod_4 = offset & 3;
const int off_minus_4 = 4 - off_mod_4;
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
for (u32 idx = 1; idx < count; idx++)
{
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
}
}
@ -189,11 +164,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
u32 expected_sha1[5];
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));

View File

@ -43,31 +43,6 @@ typedef struct gpg_tmp
} gpg_tmp_t;
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
{
const int c_mod_4 = c & 3;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
#endif
const u32 r = hc_byte_perm (b, a, selector);
#endif
return r;
}
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
{
const u32 start_index = (offset - 1) >> 2;
@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c
const int off_mod_4 = offset & 3;
const int off_minus_4 = 4 - off_mod_4;
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4);
for (u32 idx = 1; idx < count; idx++)
{
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4);
}
}
@ -168,11 +143,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded
u32 expected_sha1[5];
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);

View File

@ -145,24 +145,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
u32 tmp0;
u32 tmp1;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector);
#endif
tmp0 = hc_bytealign_be_S (0, append, func_len);
tmp1 = hc_bytealign_be_S (append, 0, func_len);
u32 carry = 0;

View File

@ -58,24 +58,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3
u32 tmp0;
u32 tmp1;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector);
#endif
tmp0 = hc_bytealign_be_S (0, append, func_len);
tmp1 = hc_bytealign_be_S (append, 0, func_len);
u32 carry = 0;

View File

@ -29,7 +29,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -67,56 +66,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

View File

@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

View File

@ -26,7 +26,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -64,56 +63,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

View File

@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

View File

@ -28,7 +28,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -66,56 +65,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

View File

@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)