From 332396a0036ef1e3407462acb815e8a21fc3dc52 Mon Sep 17 00:00:00 2001 From: jsteube <jens.steube@gmail.com> Date: Fri, 28 Jul 2017 02:28:52 +0200 Subject: [PATCH] Fix SCRYPT on ROCm --- OpenCL/m08900.cl | 26 ++++++++++++-------------- OpenCL/m15700.cl | 26 ++++++++++++-------------- src/shared.c | 6 ------ 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/OpenCL/m08900.cl b/OpenCL/m08900.cl index 402c2e0c0..9d65e1a71 100644 --- a/OpenCL/m08900.cl +++ b/OpenCL/m08900.cl @@ -138,6 +138,16 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __ const u32 xd4 = x / 4; const u32 xm4 = x & 3; + __global uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + #ifdef _unroll #pragma unroll #endif @@ -156,13 +166,7 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __ for (u32 y = 0; y < ySIZE; y++) { - switch (xm4) - { - case 0: for (u32 z = 0; z < zSIZE; z++) V0[CO] = X[z]; break; - case 1: for (u32 z = 0; z < zSIZE; z++) V1[CO] = X[z]; break; - case 2: for (u32 z = 0; z < zSIZE; z++) V2[CO] = X[z]; break; - case 3: for (u32 z = 0; z < zSIZE; z++) V3[CO] = X[z]; break; - } + for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } @@ -175,13 +179,7 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __ const u32 km = k - (y * SCRYPT_TMTO); - switch (xm4) - { - case 0: for (u32 z = 0; z < zSIZE; z++) T[z] = V0[CO]; break; - case 1: for (u32 z = 0; z < zSIZE; z++) T[z] = V1[CO]; break; - case 2: for (u32 z = 0; z < zSIZE; z++) T[z] = V2[CO]; break; - case 3: for (u32 z = 0; z < zSIZE; z++) T[z] = V3[CO]; break; - } + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); diff --git a/OpenCL/m15700.cl b/OpenCL/m15700.cl index 57a33dc1b..a1971093f 100644 --- a/OpenCL/m15700.cl +++ b/OpenCL/m15700.cl @@ -138,6 +138,16 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __ const u32 xd4 = x / 4; const u32 xm4 = x & 3; + __global uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + #ifdef _unroll #pragma unroll #endif @@ -156,13 +166,7 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __ for (u32 y = 0; y < ySIZE; y++) { - switch (xm4) - { - case 0: for (u32 z = 0; z < zSIZE; z++) V0[CO] = X[z]; break; - case 1: for (u32 z = 0; z < zSIZE; z++) V1[CO] = X[z]; break; - case 2: for (u32 z = 0; z < zSIZE; z++) V2[CO] = X[z]; break; - case 3: for (u32 z = 0; z < zSIZE; z++) V3[CO] = X[z]; break; - } + for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } @@ -175,13 +179,7 @@ void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __ const u32 km = k - (y * SCRYPT_TMTO); - switch (xm4) - { - case 0: for (u32 z = 0; z < zSIZE; z++) T[z] = V0[CO]; break; - case 1: for (u32 z = 0; z < zSIZE; z++) T[z] = V1[CO]; break; - case 2: for (u32 z = 0; z < zSIZE; z++) T[z] = V2[CO]; break; - case 3: for (u32 z = 0; z < zSIZE; z++) T[z] = V3[CO]; break; - } + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); diff --git a/src/shared.c b/src/shared.c index 83442586d..9ea3d575b 100644 --- a/src/shared.c +++ b/src/shared.c @@ -334,12 +334,6 @@ void setup_environment_variables () putenv ((char *) "DISPLAY=:0"); } - if (getenv ("GPU_FORCE_64BIT_PTR") == NULL) - putenv ((char *) "GPU_FORCE_64BIT_PTR=0"); - - if (getenv ("GPU_USE_SYNC_OBJECTS") == NULL) - putenv ((char *) "GPU_USE_SYNC_OBJECTS=1"); - if (getenv ("OCL_CODE_CACHE_ENABLE") == NULL) putenv ((char *) "OCL_CODE_CACHE_ENABLE=0");