From f8ea1d5e78c974a672583daa35397cbd32dc32c8 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 30 Apr 2021 17:22:31 +0200 Subject: [PATCH] Improve performance of test_any_8th_bit() by manually unrolling a few first steps --- OpenCL/inc_common.cl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 5bb5596b5..833ccaa1a 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1985,11 +1985,19 @@ DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS cons DECLSPEC int test_any_8th_bit (const u32 *buf, const int len) { - for (int i = 0, j = 0; i < len; i += 4, j += 1) - { - const u32 v = buf[j]; + // we simply ignore buffer length for the first 24 bytes for some extra speed boost :) + // number of unrolls found by simply testing what gave best results - if (v & 0x80808080) return 1; + if (buf[0] & 0x80808080) return 1; + if (buf[1] & 0x80808080) return 1; + if (buf[2] & 0x80808080) return 1; + if (buf[3] & 0x80808080) return 1; + if (buf[4] & 0x80808080) return 1; + if (buf[5] & 0x80808080) return 1; + + for (int i = 24, j = 6; i < len; i += 4, j += 1) + { + if (buf[j] & 0x80808080) return 1; } return 0;