mirror of
https://github.com/hashcat/hashcat.git
synced 2025-02-17 01:52:06 +00:00
Fix rule-engine optimizations for vector datatypes
This commit is contained in:
parent
7a42c81123
commit
1ed89eb261
160
OpenCL/inc_rp.cl
160
OpenCL/inc_rp.cl
@ -774,59 +774,59 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
switch (offset / 4)
|
switch (offset / 4)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
s8 = amd_bytealign ( 0, src_r1[3], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r1[3], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r1[3], src_r1[2], offset_minus_4);
|
s7 = amd_bytealign_S (src_r1[3], src_r1[2], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r1[2], src_r1[1], offset_minus_4);
|
s6 = amd_bytealign_S (src_r1[2], src_r1[1], offset_minus_4);
|
||||||
s5 = amd_bytealign (src_r1[1], src_r1[0], offset_minus_4);
|
s5 = amd_bytealign_S (src_r1[1], src_r1[0], offset_minus_4);
|
||||||
s4 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4);
|
s4 = amd_bytealign_S (src_r1[0], src_r0[3], offset_minus_4);
|
||||||
s3 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4);
|
s3 = amd_bytealign_S (src_r0[3], src_r0[2], offset_minus_4);
|
||||||
s2 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4);
|
s2 = amd_bytealign_S (src_r0[2], src_r0[1], offset_minus_4);
|
||||||
s1 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s1 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s0 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s0 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
s8 = amd_bytealign ( 0, src_r1[2], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r1[2], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r1[2], src_r1[1], offset_minus_4);
|
s7 = amd_bytealign_S (src_r1[2], src_r1[1], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r1[1], src_r1[0], offset_minus_4);
|
s6 = amd_bytealign_S (src_r1[1], src_r1[0], offset_minus_4);
|
||||||
s5 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4);
|
s5 = amd_bytealign_S (src_r1[0], src_r0[3], offset_minus_4);
|
||||||
s4 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4);
|
s4 = amd_bytealign_S (src_r0[3], src_r0[2], offset_minus_4);
|
||||||
s3 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4);
|
s3 = amd_bytealign_S (src_r0[2], src_r0[1], offset_minus_4);
|
||||||
s2 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s2 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s1 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s1 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s0 = 0;
|
s0 = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
s8 = amd_bytealign ( 0, src_r1[1], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r1[1], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r1[1], src_r1[0], offset_minus_4);
|
s7 = amd_bytealign_S (src_r1[1], src_r1[0], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4);
|
s6 = amd_bytealign_S (src_r1[0], src_r0[3], offset_minus_4);
|
||||||
s5 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4);
|
s5 = amd_bytealign_S (src_r0[3], src_r0[2], offset_minus_4);
|
||||||
s4 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4);
|
s4 = amd_bytealign_S (src_r0[2], src_r0[1], offset_minus_4);
|
||||||
s3 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s3 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s2 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s2 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s1 = 0;
|
s1 = 0;
|
||||||
s0 = 0;
|
s0 = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
s8 = amd_bytealign ( 0, src_r1[0], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r1[0], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4);
|
s7 = amd_bytealign_S (src_r1[0], src_r0[3], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4);
|
s6 = amd_bytealign_S (src_r0[3], src_r0[2], offset_minus_4);
|
||||||
s5 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4);
|
s5 = amd_bytealign_S (src_r0[2], src_r0[1], offset_minus_4);
|
||||||
s4 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s4 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s3 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s3 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s2 = 0;
|
s2 = 0;
|
||||||
s1 = 0;
|
s1 = 0;
|
||||||
s0 = 0;
|
s0 = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
s8 = amd_bytealign ( 0, src_r0[3], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r0[3], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4);
|
s7 = amd_bytealign_S (src_r0[3], src_r0[2], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4);
|
s6 = amd_bytealign_S (src_r0[2], src_r0[1], offset_minus_4);
|
||||||
s5 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s5 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s4 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s4 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s3 = 0;
|
s3 = 0;
|
||||||
s2 = 0;
|
s2 = 0;
|
||||||
s1 = 0;
|
s1 = 0;
|
||||||
@ -834,10 +834,10 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 5:
|
case 5:
|
||||||
s8 = amd_bytealign ( 0, src_r0[2], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r0[2], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4);
|
s7 = amd_bytealign_S (src_r0[2], src_r0[1], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s6 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s5 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s5 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s4 = 0;
|
s4 = 0;
|
||||||
s3 = 0;
|
s3 = 0;
|
||||||
s2 = 0;
|
s2 = 0;
|
||||||
@ -846,9 +846,9 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 6:
|
case 6:
|
||||||
s8 = amd_bytealign ( 0, src_r0[1], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r0[1], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4);
|
s7 = amd_bytealign_S (src_r0[1], src_r0[0], offset_minus_4);
|
||||||
s6 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s6 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s5 = 0;
|
s5 = 0;
|
||||||
s4 = 0;
|
s4 = 0;
|
||||||
s3 = 0;
|
s3 = 0;
|
||||||
@ -858,8 +858,8 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 7:
|
case 7:
|
||||||
s8 = amd_bytealign ( 0, src_r0[0], offset_minus_4);
|
s8 = amd_bytealign_S ( 0, src_r0[0], offset_minus_4);
|
||||||
s7 = amd_bytealign (src_r0[0], 0, offset_minus_4);
|
s7 = amd_bytealign_S (src_r0[0], 0, offset_minus_4);
|
||||||
s6 = 0;
|
s6 = 0;
|
||||||
s5 = 0;
|
s5 = 0;
|
||||||
s4 = 0;
|
s4 = 0;
|
||||||
@ -911,44 +911,44 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
switch (offset / 4)
|
switch (offset / 4)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
s7 = __byte_perm (src_r1[2], src_r1[3], selector);
|
s7 = __byte_perm_S (src_r1[2], src_r1[3], selector);
|
||||||
s6 = __byte_perm (src_r1[1], src_r1[2], selector);
|
s6 = __byte_perm_S (src_r1[1], src_r1[2], selector);
|
||||||
s5 = __byte_perm (src_r1[0], src_r1[1], selector);
|
s5 = __byte_perm_S (src_r1[0], src_r1[1], selector);
|
||||||
s4 = __byte_perm (src_r0[3], src_r1[0], selector);
|
s4 = __byte_perm_S (src_r0[3], src_r1[0], selector);
|
||||||
s3 = __byte_perm (src_r0[2], src_r0[3], selector);
|
s3 = __byte_perm_S (src_r0[2], src_r0[3], selector);
|
||||||
s2 = __byte_perm (src_r0[1], src_r0[2], selector);
|
s2 = __byte_perm_S (src_r0[1], src_r0[2], selector);
|
||||||
s1 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s1 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s0 = __byte_perm ( 0, src_r0[0], selector);
|
s0 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
s7 = __byte_perm (src_r1[1], src_r1[2], selector);
|
s7 = __byte_perm_S (src_r1[1], src_r1[2], selector);
|
||||||
s6 = __byte_perm (src_r1[0], src_r1[1], selector);
|
s6 = __byte_perm_S (src_r1[0], src_r1[1], selector);
|
||||||
s5 = __byte_perm (src_r0[3], src_r1[0], selector);
|
s5 = __byte_perm_S (src_r0[3], src_r1[0], selector);
|
||||||
s4 = __byte_perm (src_r0[2], src_r0[3], selector);
|
s4 = __byte_perm_S (src_r0[2], src_r0[3], selector);
|
||||||
s3 = __byte_perm (src_r0[1], src_r0[2], selector);
|
s3 = __byte_perm_S (src_r0[1], src_r0[2], selector);
|
||||||
s2 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s2 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s1 = __byte_perm ( 0, src_r0[0], selector);
|
s1 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s0 = 0;
|
s0 = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
s7 = __byte_perm (src_r1[0], src_r1[1], selector);
|
s7 = __byte_perm_S (src_r1[0], src_r1[1], selector);
|
||||||
s6 = __byte_perm (src_r0[3], src_r1[0], selector);
|
s6 = __byte_perm_S (src_r0[3], src_r1[0], selector);
|
||||||
s5 = __byte_perm (src_r0[2], src_r0[3], selector);
|
s5 = __byte_perm_S (src_r0[2], src_r0[3], selector);
|
||||||
s4 = __byte_perm (src_r0[1], src_r0[2], selector);
|
s4 = __byte_perm_S (src_r0[1], src_r0[2], selector);
|
||||||
s3 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s3 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s2 = __byte_perm ( 0, src_r0[0], selector);
|
s2 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s1 = 0;
|
s1 = 0;
|
||||||
s0 = 0;
|
s0 = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
s7 = __byte_perm (src_r0[3], src_r1[0], selector);
|
s7 = __byte_perm_S (src_r0[3], src_r1[0], selector);
|
||||||
s6 = __byte_perm (src_r0[2], src_r0[3], selector);
|
s6 = __byte_perm_S (src_r0[2], src_r0[3], selector);
|
||||||
s5 = __byte_perm (src_r0[1], src_r0[2], selector);
|
s5 = __byte_perm_S (src_r0[1], src_r0[2], selector);
|
||||||
s4 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s4 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s3 = __byte_perm ( 0, src_r0[0], selector);
|
s3 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s2 = 0;
|
s2 = 0;
|
||||||
s1 = 0;
|
s1 = 0;
|
||||||
s0 = 0;
|
s0 = 0;
|
||||||
@ -956,10 +956,10 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
s7 = __byte_perm (src_r0[2], src_r0[3], selector);
|
s7 = __byte_perm_S (src_r0[2], src_r0[3], selector);
|
||||||
s6 = __byte_perm (src_r0[1], src_r0[2], selector);
|
s6 = __byte_perm_S (src_r0[1], src_r0[2], selector);
|
||||||
s5 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s5 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s4 = __byte_perm ( 0, src_r0[0], selector);
|
s4 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s3 = 0;
|
s3 = 0;
|
||||||
s2 = 0;
|
s2 = 0;
|
||||||
s1 = 0;
|
s1 = 0;
|
||||||
@ -967,9 +967,9 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 5:
|
case 5:
|
||||||
s7 = __byte_perm (src_r0[1], src_r0[2], selector);
|
s7 = __byte_perm_S (src_r0[1], src_r0[2], selector);
|
||||||
s6 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s6 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s5 = __byte_perm ( 0, src_r0[0], selector);
|
s5 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s4 = 0;
|
s4 = 0;
|
||||||
s3 = 0;
|
s3 = 0;
|
||||||
s2 = 0;
|
s2 = 0;
|
||||||
@ -978,8 +978,8 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 6:
|
case 6:
|
||||||
s7 = __byte_perm (src_r0[0], src_r0[1], selector);
|
s7 = __byte_perm_S (src_r0[0], src_r0[1], selector);
|
||||||
s6 = __byte_perm ( 0, src_r0[0], selector);
|
s6 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s5 = 0;
|
s5 = 0;
|
||||||
s4 = 0;
|
s4 = 0;
|
||||||
s3 = 0;
|
s3 = 0;
|
||||||
@ -989,7 +989,7 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 7:
|
case 7:
|
||||||
s7 = __byte_perm ( 0, src_r0[0], selector);
|
s7 = __byte_perm_S ( 0, src_r0[0], selector);
|
||||||
s6 = 0;
|
s6 = 0;
|
||||||
s5 = 0;
|
s5 = 0;
|
||||||
s4 = 0;
|
s4 = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user