|
|
|
@ -138,6 +138,16 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __
|
|
|
|
|
const u32 xd4 = x / 4;
|
|
|
|
|
const u32 xm4 = x & 3;
|
|
|
|
|
|
|
|
|
|
__global uint4 *V;
|
|
|
|
|
|
|
|
|
|
switch (xm4)
|
|
|
|
|
{
|
|
|
|
|
case 0: V = V0; break;
|
|
|
|
|
case 1: V = V1; break;
|
|
|
|
|
case 2: V = V2; break;
|
|
|
|
|
case 3: V = V3; break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
@ -156,13 +166,7 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __
|
|
|
|
|
|
|
|
|
|
for (u32 y = 0; y < ySIZE; y++)
|
|
|
|
|
{
|
|
|
|
|
switch (xm4)
|
|
|
|
|
{
|
|
|
|
|
case 0: for (u32 z = 0; z < zSIZE; z++) V0[CO] = X[z]; break;
|
|
|
|
|
case 1: for (u32 z = 0; z < zSIZE; z++) V1[CO] = X[z]; break;
|
|
|
|
|
case 2: for (u32 z = 0; z < zSIZE; z++) V2[CO] = X[z]; break;
|
|
|
|
|
case 3: for (u32 z = 0; z < zSIZE; z++) V3[CO] = X[z]; break;
|
|
|
|
|
}
|
|
|
|
|
for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z];
|
|
|
|
|
|
|
|
|
|
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
|
|
|
|
|
}
|
|
|
|
@ -175,13 +179,7 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __
|
|
|
|
|
|
|
|
|
|
const u32 km = k - (y * SCRYPT_TMTO);
|
|
|
|
|
|
|
|
|
|
switch (xm4)
|
|
|
|
|
{
|
|
|
|
|
case 0: for (u32 z = 0; z < zSIZE; z++) T[z] = V0[CO]; break;
|
|
|
|
|
case 1: for (u32 z = 0; z < zSIZE; z++) T[z] = V1[CO]; break;
|
|
|
|
|
case 2: for (u32 z = 0; z < zSIZE; z++) T[z] = V2[CO]; break;
|
|
|
|
|
case 3: for (u32 z = 0; z < zSIZE; z++) T[z] = V3[CO]; break;
|
|
|
|
|
}
|
|
|
|
|
for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO];
|
|
|
|
|
|
|
|
|
|
for (u32 i = 0; i < km; i++) salsa_r (T);
|
|
|
|
|
|
|
|
|
|