mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-08 14:51:10 +00:00
Reduce some register allocation in SCRYPT based kernels to improve speed on GPU with low resources
This commit is contained in:
parent
c444b42290
commit
2b069a94fd
@ -132,11 +132,11 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
uint4 R2 = TI[STATE_CNT4 - 2];
|
||||
uint4 R3 = TI[STATE_CNT4 - 1];
|
||||
|
||||
uint4 TO[STATE_CNT4];
|
||||
uint4 TT[STATE_CNT4 / 2];
|
||||
|
||||
int idx_y = 0;
|
||||
int idx_r1 = 0;
|
||||
int idx_r2 = SCRYPT_R * 4;
|
||||
int idx_r2 = 0;
|
||||
|
||||
for (int i = 0; i < SCRYPT_R; i++)
|
||||
{
|
||||
@ -152,10 +152,10 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
|
||||
SALSA20_8_XOR ();
|
||||
|
||||
TO[idx_r1++] = R0;
|
||||
TO[idx_r1++] = R1;
|
||||
TO[idx_r1++] = R2;
|
||||
TO[idx_r1++] = R3;
|
||||
TI[idx_r1++] = R0;
|
||||
TI[idx_r1++] = R1;
|
||||
TI[idx_r1++] = R2;
|
||||
TI[idx_r1++] = R3;
|
||||
|
||||
Y0 = TI[idx_y++];
|
||||
Y1 = TI[idx_y++];
|
||||
@ -164,18 +164,24 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
|
||||
SALSA20_8_XOR ();
|
||||
|
||||
TO[idx_r2++] = R0;
|
||||
TO[idx_r2++] = R1;
|
||||
TO[idx_r2++] = R2;
|
||||
TO[idx_r2++] = R3;
|
||||
TT[idx_r2++] = R0;
|
||||
TT[idx_r2++] = R1;
|
||||
TT[idx_r2++] = R2;
|
||||
TT[idx_r2++] = R3;
|
||||
}
|
||||
|
||||
idx_r1 = 0;
|
||||
idx_r2 = SCRYPT_R * 4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < STATE_CNT4; i++)
|
||||
for (int i = 0; i < SCRYPT_R; i++)
|
||||
{
|
||||
TI[i] = TO[i];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,11 +139,11 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
uint4 R2 = TI[STATE_CNT4 - 2];
|
||||
uint4 R3 = TI[STATE_CNT4 - 1];
|
||||
|
||||
uint4 TO[STATE_CNT4];
|
||||
uint4 TT[STATE_CNT4 / 2];
|
||||
|
||||
int idx_y = 0;
|
||||
int idx_r1 = 0;
|
||||
int idx_r2 = SCRYPT_R * 4;
|
||||
int idx_r2 = 0;
|
||||
|
||||
for (int i = 0; i < SCRYPT_R; i++)
|
||||
{
|
||||
@ -159,10 +159,10 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
|
||||
SALSA20_8_XOR ();
|
||||
|
||||
TO[idx_r1++] = R0;
|
||||
TO[idx_r1++] = R1;
|
||||
TO[idx_r1++] = R2;
|
||||
TO[idx_r1++] = R3;
|
||||
TI[idx_r1++] = R0;
|
||||
TI[idx_r1++] = R1;
|
||||
TI[idx_r1++] = R2;
|
||||
TI[idx_r1++] = R3;
|
||||
|
||||
Y0 = TI[idx_y++];
|
||||
Y1 = TI[idx_y++];
|
||||
@ -171,16 +171,24 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
|
||||
SALSA20_8_XOR ();
|
||||
|
||||
TO[idx_r2++] = R0;
|
||||
TO[idx_r2++] = R1;
|
||||
TO[idx_r2++] = R2;
|
||||
TO[idx_r2++] = R3;
|
||||
TT[idx_r2++] = R0;
|
||||
TT[idx_r2++] = R1;
|
||||
TT[idx_r2++] = R2;
|
||||
TT[idx_r2++] = R3;
|
||||
}
|
||||
|
||||
idx_r1 = 0;
|
||||
idx_r2 = SCRYPT_R * 4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
for (int i = 0; i < STATE_CNT4; i++)
|
||||
#endif
|
||||
for (int i = 0; i < SCRYPT_R; i++)
|
||||
{
|
||||
TI[i] = TO[i];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -180,11 +180,11 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
uint4 R2 = TI[STATE_CNT4 - 2];
|
||||
uint4 R3 = TI[STATE_CNT4 - 1];
|
||||
|
||||
uint4 TO[STATE_CNT4];
|
||||
uint4 TT[STATE_CNT4 / 2];
|
||||
|
||||
int idx_y = 0;
|
||||
int idx_r1 = 0;
|
||||
int idx_r2 = SCRYPT_R * 4;
|
||||
int idx_r2 = 0;
|
||||
|
||||
for (int i = 0; i < SCRYPT_R; i++)
|
||||
{
|
||||
@ -200,10 +200,10 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
|
||||
SALSA20_8_XOR ();
|
||||
|
||||
TO[idx_r1++] = R0;
|
||||
TO[idx_r1++] = R1;
|
||||
TO[idx_r1++] = R2;
|
||||
TO[idx_r1++] = R3;
|
||||
TI[idx_r1++] = R0;
|
||||
TI[idx_r1++] = R1;
|
||||
TI[idx_r1++] = R2;
|
||||
TI[idx_r1++] = R3;
|
||||
|
||||
Y0 = TI[idx_y++];
|
||||
Y1 = TI[idx_y++];
|
||||
@ -212,16 +212,24 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
|
||||
SALSA20_8_XOR ();
|
||||
|
||||
TO[idx_r2++] = R0;
|
||||
TO[idx_r2++] = R1;
|
||||
TO[idx_r2++] = R2;
|
||||
TO[idx_r2++] = R3;
|
||||
TT[idx_r2++] = R0;
|
||||
TT[idx_r2++] = R1;
|
||||
TT[idx_r2++] = R2;
|
||||
TT[idx_r2++] = R3;
|
||||
}
|
||||
|
||||
idx_r1 = 0;
|
||||
idx_r2 = SCRYPT_R * 4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
for (int i = 0; i < STATE_CNT4; i++)
|
||||
#endif
|
||||
for (int i = 0; i < SCRYPT_R; i++)
|
||||
{
|
||||
TI[i] = TO[i];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
TI[idx_r2++] = TT[idx_r1++];
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user