From e999ae873795a53d969f0c7945438e4b0f2a3960 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Tue, 4 Jun 2019 12:15:34 +0200 Subject: [PATCH] Speed up -m 11500 in general --- OpenCL/m11500_a0-optimized.cl | 12 ++++++++++-- OpenCL/m11500_a1-optimized.cl | 12 ++++++++++-- OpenCL/m11500_a3-optimized.cl | 12 ++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/OpenCL/m11500_a0-optimized.cl b/OpenCL/m11500_a0-optimized.cl index 1ea2ec7e4..6992b41ce 100644 --- a/OpenCL/m11500_a0-optimized.cl +++ b/OpenCL/m11500_a0-optimized.cl @@ -115,8 +115,16 @@ DECLSPEC u32x crc32 (const u32x *w, const u32 pw_len, const u32 iv) if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) + if (pw_len >= 5) a = round_crc32 (a, w[1] >> 0); + if (pw_len >= 6) a = round_crc32 (a, w[1] >> 8); + if (pw_len >= 7) a = round_crc32 (a, w[1] >> 16); + if (pw_len >= 8) a = round_crc32 (a, w[1] >> 24); + if (pw_len >= 9) a = round_crc32 (a, w[2] >> 0); + if (pw_len >= 10) a = round_crc32 (a, w[2] >> 8); + if (pw_len >= 11) a = round_crc32 (a, w[2] >> 16); + if (pw_len >= 12) a = round_crc32 (a, w[2] >> 24); + + for (u32 i = 12, j = 3; i < pw_len; i += 4, j += 1) { if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8); diff --git a/OpenCL/m11500_a1-optimized.cl b/OpenCL/m11500_a1-optimized.cl index b3390f961..c46bbad0e 100644 --- a/OpenCL/m11500_a1-optimized.cl +++ b/OpenCL/m11500_a1-optimized.cl @@ -113,8 +113,16 @@ DECLSPEC u32x crc32 (const u32x *w, const u32 pw_len, const u32 iv) if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) + if (pw_len >= 5) a = round_crc32 (a, w[1] >> 0); + if (pw_len >= 6) a = round_crc32 (a, w[1] >> 8); + if (pw_len >= 7) a = round_crc32 (a, w[1] >> 16); + if (pw_len >= 8) a = round_crc32 (a, w[1] >> 24); + if (pw_len >= 9) a = round_crc32 (a, w[2] >> 0); + if (pw_len >= 10) a = round_crc32 (a, w[2] >> 8); + if (pw_len >= 11) a = round_crc32 (a, w[2] >> 16); + if (pw_len >= 12) a = round_crc32 (a, w[2] >> 24); + + for (u32 i = 12, j = 3; i < pw_len; i += 4, j += 1) { if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8); diff --git a/OpenCL/m11500_a3-optimized.cl b/OpenCL/m11500_a3-optimized.cl index d10b09401..c83433d30 100644 --- a/OpenCL/m11500_a3-optimized.cl +++ b/OpenCL/m11500_a3-optimized.cl @@ -113,8 +113,16 @@ DECLSPEC u32x crc32 (const u32x *w, const u32 pw_len, const u32 iv) if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) + if (pw_len >= 5) a = round_crc32 (a, w[1] >> 0); + if (pw_len >= 6) a = round_crc32 (a, w[1] >> 8); + if (pw_len >= 7) a = round_crc32 (a, w[1] >> 16); + if (pw_len >= 8) a = round_crc32 (a, w[1] >> 24); + if (pw_len >= 9) a = round_crc32 (a, w[2] >> 0); + if (pw_len >= 10) a = round_crc32 (a, w[2] >> 8); + if (pw_len >= 11) a = round_crc32 (a, w[2] >> 16); + if (pw_len >= 12) a = round_crc32 (a, w[2] >> 24); + + for (u32 i = 12, j = 3; i < pw_len; i += 4, j += 1) { if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8);