From b29b7b8188ed97c2a0b5d49fb04bf1818dfa39d7 Mon Sep 17 00:00:00 2001 From: unix-ninja Date: Wed, 17 Oct 2018 08:54:52 -0400 Subject: [PATCH] Convert arithmetic ops to logical ops in byte alignment --- OpenCL/m18100_a0-pure.cl | 18 ++++++++++-------- OpenCL/m18100_a3-pure.cl | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/OpenCL/m18100_a0-pure.cl b/OpenCL/m18100_a0-pure.cl index 4de835280..76a7f2705 100644 --- a/OpenCL/m18100_a0-pure.cl +++ b/OpenCL/m18100_a0-pure.cl @@ -66,16 +66,17 @@ __kernel void m18100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru unsigned int otp_code = 0; // grab 4 consecutive bytes of the hash, starting at offset - switch(otp_offset%4) + // on some systems, &3 is faster than %4, so we will use it in our switch() + switch(otp_offset&3) { case 1: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 24); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 24); break; case 2: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffff) << 16) | ((ctx.opad.h[otp_offset/4+1] % 0xffff0000) >> 16); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xfffeffff) >> 16); break; case 3: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xff) << 24) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 8); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 8); break; default: otp_code = ctx.opad.h[otp_offset/4]; @@ -158,16 +159,17 @@ __kernel void m18100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru unsigned int otp_code = 0; // grab 4 consecutive bytes of the hash, starting at offset - switch(otp_offset%4) + // on some systems, &3 is faster than %4, so we will use it in our switch() + switch(otp_offset&3) { case 1: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 24); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 24); break; case 2: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffff) << 16) | ((ctx.opad.h[otp_offset/4+1] % 0xffff0000) >> 16); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xfffeffff) >> 16); break; case 3: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xff) << 24) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 8); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 8); break; default: otp_code = ctx.opad.h[otp_offset/4]; diff --git a/OpenCL/m18100_a3-pure.cl b/OpenCL/m18100_a3-pure.cl index a106acb99..8f90361e9 100644 --- a/OpenCL/m18100_a3-pure.cl +++ b/OpenCL/m18100_a3-pure.cl @@ -75,16 +75,17 @@ __kernel void m18100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule unsigned int otp_code = 0; // grab 4 consecutive bytes of the hash, starting at offset - switch(otp_offset%4) + // on some systems, &3 is faster than %4, so we will use it in our switch() + switch(otp_offset&3) { case 1: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 24); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 24); break; case 2: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffff) << 16) | ((ctx.opad.h[otp_offset/4+1] % 0xffff0000) >> 16); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xfffeffff) >> 16); break; case 3: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xff) << 24) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 8); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xfeffffff) >> 8); break; default: otp_code = ctx.opad.h[otp_offset/4]; @@ -178,16 +179,17 @@ __kernel void m18100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule unsigned int otp_code = 0; // grab 4 consecutive bytes of the hash, starting at offset - switch(otp_offset%4) + // on some systems, &3 is faster than %4, so we will use it in our switch() + switch(otp_offset&3) { case 1: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 24); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 24); break; case 2: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xffff) << 16) | ((ctx.opad.h[otp_offset/4+1] % 0xffff0000) >> 16); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xfffeffff) >> 16); break; case 3: - otp_code = ((ctx.opad.h[otp_offset/4] & 0xff) << 24) | ((ctx.opad.h[otp_offset/4+1] % 0xffffff00) >> 8); + otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xfffffeff) >> 8); break; default: otp_code = ctx.opad.h[otp_offset/4];