From c3191ae6948836a88af6f4ca26cf749a6f918a58 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 20 Dec 2015 16:09:00 +0100 Subject: [PATCH] Fix keccak speed on NV OpenCL runtime --- OpenCL/m05000_a0.cl | 44 ++++++++++++++++++++++++++++++++------------ OpenCL/m05000_a1.cl | 44 ++++++++++++++++++++++++++++++++------------ OpenCL/m05000_a3.cl | 44 ++++++++++++++++++++++++++++++++------------ 3 files changed, 96 insertions(+), 36 deletions(-) diff --git a/OpenCL/m05000_a0.cl b/OpenCL/m05000_a0.cl index a6aa03ada..0a8c6f70f 100644 --- a/OpenCL/m05000_a0.cl +++ b/OpenCL/m05000_a0.cl @@ -34,18 +34,6 @@ __constant u64 keccakf_rndc[24] = 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 }; -__constant u32 keccakf_rotc[24] = -{ - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 -}; - -__constant u32 keccakf_piln[24] = -{ - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 -}; - #ifndef KECCAK_ROUNDS #define KECCAK_ROUNDS 24 #endif @@ -92,6 +80,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo const u32 lid = get_local_id (0); + /** + * const + */ + + const u8 keccakf_rotc[24] = + { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + + const u8 keccakf_piln[24] = + { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + /** * base */ @@ -281,6 +285,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo const u32 lid = get_local_id (0); + /** + * const + */ + + const u8 keccakf_rotc[24] = + { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + + const u8 keccakf_piln[24] = + { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + /** * base */ diff --git a/OpenCL/m05000_a1.cl b/OpenCL/m05000_a1.cl index b986d1542..a9d6d97c8 100644 --- a/OpenCL/m05000_a1.cl +++ b/OpenCL/m05000_a1.cl @@ -32,18 +32,6 @@ __constant u64 keccakf_rndc[24] = 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 }; -__constant u32 keccakf_rotc[24] = -{ - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 -}; - -__constant u32 keccakf_piln[24] = -{ - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 -}; - #ifndef KECCAK_ROUNDS #define KECCAK_ROUNDS 24 #endif @@ -90,6 +78,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo const u32 lid = get_local_id (0); + /** + * const + */ + + const u8 keccakf_rotc[24] = + { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + + const u8 keccakf_piln[24] = + { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + /** * base */ @@ -335,6 +339,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo const u32 lid = get_local_id (0); + /** + * const + */ + + const u8 keccakf_rotc[24] = + { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + + const u8 keccakf_piln[24] = + { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + /** * base */ diff --git a/OpenCL/m05000_a3.cl b/OpenCL/m05000_a3.cl index e273678ec..0d29349e6 100644 --- a/OpenCL/m05000_a3.cl +++ b/OpenCL/m05000_a3.cl @@ -32,18 +32,6 @@ __constant u64 keccakf_rndc[24] = 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 }; -__constant u32 keccakf_rotc[24] = -{ - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 -}; - -__constant u32 keccakf_piln[24] = -{ - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 -}; - #ifndef KECCAK_ROUNDS #define KECCAK_ROUNDS 24 #endif @@ -91,6 +79,22 @@ static void m05000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); + /** + * const + */ + + const u8 keccakf_rotc[24] = + { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + + const u8 keccakf_piln[24] = + { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + /** * 0x80 keccak, very special */ @@ -223,6 +227,22 @@ static void m05000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); + /** + * const + */ + + const u8 keccakf_rotc[24] = + { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + + const u8 keccakf_piln[24] = + { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + /** * digest */