From 303cfcae154262b57b0860f89817e2780182c7a2 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 8 May 2016 16:07:40 +0200 Subject: [PATCH] Enable unrolling of SHA512 with HMAC on NV --- OpenCL/m01750_a0.cl | 7 ++++++- OpenCL/m01750_a1.cl | 7 ++++++- OpenCL/m01750_a3.cl | 7 ++++++- OpenCL/m01760_a0.cl | 7 ++++++- OpenCL/m01760_a1.cl | 7 ++++++- OpenCL/m01760_a3.cl | 7 ++++++- OpenCL/m06221.cl | 7 ++++++- OpenCL/m06222.cl | 7 ++++++- OpenCL/m06223.cl | 7 ++++++- OpenCL/m06500.cl | 7 ++++++- OpenCL/m07100.cl | 7 ++++++- OpenCL/m07900.cl | 2 +- OpenCL/m08200.cl | 7 ++++++- OpenCL/m09600.cl | 7 ++++++- OpenCL/m11300.cl | 2 +- OpenCL/m12200.cl | 7 ++++++- OpenCL/m12300.cl | 7 ++++++- 17 files changed, 92 insertions(+), 17 deletions(-) diff --git a/OpenCL/m01750_a0.cl b/OpenCL/m01750_a0.cl index 0ad47907f..509538520 100644 --- a/OpenCL/m01750_a0.cl +++ b/OpenCL/m01750_a0.cl @@ -116,7 +116,12 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a1.cl b/OpenCL/m01750_a1.cl index f2697fc38..d920d1470 100644 --- a/OpenCL/m01750_a1.cl +++ b/OpenCL/m01750_a1.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a3.cl b/OpenCL/m01750_a3.cl index e2138504d..2f08963c3 100644 --- a/OpenCL/m01750_a3.cl +++ b/OpenCL/m01750_a3.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a0.cl b/OpenCL/m01760_a0.cl index f48718041..8e2338fc0 100644 --- a/OpenCL/m01760_a0.cl +++ b/OpenCL/m01760_a0.cl @@ -116,7 +116,12 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a1.cl b/OpenCL/m01760_a1.cl index d32944697..a0ba4be32 100644 --- a/OpenCL/m01760_a1.cl +++ b/OpenCL/m01760_a1.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a3.cl b/OpenCL/m01760_a3.cl index b45cfab0d..cb8ff6b1c 100644 --- a/OpenCL/m01760_a3.cl +++ b/OpenCL/m01760_a3.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06221.cl b/OpenCL/m06221.cl index 294908034..60bdf5409 100644 --- a/OpenCL/m06221.cl +++ b/OpenCL/m06221.cl @@ -115,7 +115,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06222.cl b/OpenCL/m06222.cl index 2620c7201..102a890e3 100644 --- a/OpenCL/m06222.cl +++ b/OpenCL/m06222.cl @@ -115,7 +115,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06223.cl b/OpenCL/m06223.cl index a0eb7c229..86afa82c6 100644 --- a/OpenCL/m06223.cl +++ b/OpenCL/m06223.cl @@ -115,7 +115,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06500.cl b/OpenCL/m06500.cl index 44e4d7fc5..5fa3977cd 100644 --- a/OpenCL/m06500.cl +++ b/OpenCL/m06500.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m07100.cl b/OpenCL/m07100.cl index a9f09000e..15252e71b 100644 --- a/OpenCL/m07100.cl +++ b/OpenCL/m07100.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m07900.cl b/OpenCL/m07900.cl index e2af12720..40b13a749 100644 --- a/OpenCL/m07900.cl +++ b/OpenCL/m07900.cl @@ -114,7 +114,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m08200.cl b/OpenCL/m08200.cl index 73e076d07..da4405ee2 100644 --- a/OpenCL/m08200.cl +++ b/OpenCL/m08200.cl @@ -321,7 +321,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND512_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND512_EXPAND (); ROUND512_STEP (i); diff --git a/OpenCL/m09600.cl b/OpenCL/m09600.cl index 03cb525e0..f6d4ba6fe 100644 --- a/OpenCL/m09600.cl +++ b/OpenCL/m09600.cl @@ -1073,7 +1073,12 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m11300.cl b/OpenCL/m11300.cl index db71c2fa9..0547e57de 100644 --- a/OpenCL/m11300.cl +++ b/OpenCL/m11300.cl @@ -986,7 +986,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m12200.cl b/OpenCL/m12200.cl index cbb96eb03..001a5a2f2 100644 --- a/OpenCL/m12200.cl +++ b/OpenCL/m12200.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m12300.cl b/OpenCL/m12300.cl index 5ab6a05a1..33b410a4d 100644 --- a/OpenCL/m12300.cl +++ b/OpenCL/m12300.cl @@ -114,7 +114,12 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef IS_AMD + // #pragma unroll + // breaks compiler + #else + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i);