From 75b4164498484877b6c45b08dc7ec3819454eecd Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 7 Dec 2019 11:29:39 +0100 Subject: [PATCH] Use a different code for mod_512() to help some NV GPU to not hang --- OpenCL/inc_ecc_secp256k1.cl | 228 +++++++++++++++++------------------- 1 file changed, 110 insertions(+), 118 deletions(-) diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index 9d0541739..3318298ff 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -369,59 +369,55 @@ DECLSPEC void mod_512 (u32 n[16]) while (a[0] >= b[0]) { - const u32 l1 = (a[ 0] < b[ 0]) << 0 - | (a[ 1] < b[ 1]) << 1 - | (a[ 2] < b[ 2]) << 2 - | (a[ 3] < b[ 3]) << 3 - | (a[ 4] < b[ 4]) << 4 - | (a[ 5] < b[ 5]) << 5 - | (a[ 6] < b[ 6]) << 6 - | (a[ 7] < b[ 7]) << 7 - | (a[ 8] < b[ 8]) << 8 - | (a[ 9] < b[ 9]) << 9 - | (a[10] < b[10]) << 10 - | (a[11] < b[11]) << 11 - | (a[12] < b[12]) << 12 - | (a[13] < b[13]) << 13 - | (a[14] < b[14]) << 14 - | (a[15] < b[15]) << 15; + u32 l00 = a[ 0] < b[ 0]; + u32 l01 = a[ 1] < b[ 1]; + u32 l02 = a[ 2] < b[ 2]; + u32 l03 = a[ 3] < b[ 3]; + u32 l04 = a[ 4] < b[ 4]; + u32 l05 = a[ 5] < b[ 5]; + u32 l06 = a[ 6] < b[ 6]; + u32 l07 = a[ 7] < b[ 7]; + u32 l08 = a[ 8] < b[ 8]; + u32 l09 = a[ 9] < b[ 9]; + u32 l10 = a[10] < b[10]; + u32 l11 = a[11] < b[11]; + u32 l12 = a[12] < b[12]; + u32 l13 = a[13] < b[13]; + u32 l14 = a[14] < b[14]; + u32 l15 = a[15] < b[15]; - const u32 e1 = (a[ 0] == b[ 0]) << 0 - | (a[ 1] == b[ 1]) << 1 - | (a[ 2] == b[ 2]) << 2 - | (a[ 3] == b[ 3]) << 3 - | (a[ 4] == b[ 4]) << 4 - | (a[ 5] == b[ 5]) << 5 - | (a[ 6] == b[ 6]) << 6 - | (a[ 7] == b[ 7]) << 7 - | (a[ 8] == b[ 8]) << 8 - | (a[ 9] == b[ 9]) << 9 - | (a[10] == b[10]) << 10 - | (a[11] == b[11]) << 11 - | (a[12] == b[12]) << 12 - | (a[13] == b[13]) << 13 - | (a[14] == b[14]) << 14 - | (a[15] == b[15]) << 15; + u32 e00 = a[ 0] == b[ 0]; + u32 e01 = a[ 1] == b[ 1]; + u32 e02 = a[ 2] == b[ 2]; + u32 e03 = a[ 3] == b[ 3]; + u32 e04 = a[ 4] == b[ 4]; + u32 e05 = a[ 5] == b[ 5]; + u32 e06 = a[ 6] == b[ 6]; + u32 e07 = a[ 7] == b[ 7]; + u32 e08 = a[ 8] == b[ 8]; + u32 e09 = a[ 9] == b[ 9]; + u32 e10 = a[10] == b[10]; + u32 e11 = a[11] == b[11]; + u32 e12 = a[12] == b[12]; + u32 e13 = a[13] == b[13]; + u32 e14 = a[14] == b[14]; - if (l1) - { - if (l1 & 0x0001) break; - if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break; - if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break; - if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break; - if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break; - if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break; - if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break; - if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break; - if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break; - if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break; - if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break; - if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break; - if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break; - if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break; - if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break; - if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break; - } + if (l00) break; + if (l01 && e00) break; + if (l02 && e00 && e01) break; + if (l03 && e00 && e01 && e02) break; + if (l04 && e00 && e01 && e02 && e03) break; + if (l05 && e00 && e01 && e02 && e03 && e04) break; + if (l06 && e00 && e01 && e02 && e03 && e04 && e05) break; + if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) break; + if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) break; + if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) break; + if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) break; + if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) break; + if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) break; + if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) break; + if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) break; + if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) break; // r = x (copy it to have the original values for the subtraction) @@ -444,80 +440,76 @@ DECLSPEC void mod_512 (u32 n[16]) r[14] = x[14]; r[15] = x[15]; - // x >>= 1 + // x <<= 1 - x[15] = x[15] >> 1 | (x[14] & 1) << 31; - x[14] = x[14] >> 1 | (x[13] & 1) << 31; - x[13] = x[13] >> 1 | (x[12] & 1) << 31; - x[12] = x[12] >> 1 | (x[11] & 1) << 31; - x[11] = x[11] >> 1 | (x[10] & 1) << 31; - x[10] = x[10] >> 1 | (x[ 9] & 1) << 31; - x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31; - x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31; - x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31; - x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31; - x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31; - x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31; - x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31; - x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31; - x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31; + x[15] = x[15] >> 1 | x[14] << 31; + x[14] = x[14] >> 1 | x[13] << 31; + x[13] = x[13] >> 1 | x[12] << 31; + x[12] = x[12] >> 1 | x[11] << 31; + x[11] = x[11] >> 1 | x[10] << 31; + x[10] = x[10] >> 1 | x[ 9] << 31; + x[ 9] = x[ 9] >> 1 | x[ 8] << 31; + x[ 8] = x[ 8] >> 1 | x[ 7] << 31; + x[ 7] = x[ 7] >> 1 | x[ 6] << 31; + x[ 6] = x[ 6] >> 1 | x[ 5] << 31; + x[ 5] = x[ 5] >> 1 | x[ 4] << 31; + x[ 4] = x[ 4] >> 1 | x[ 3] << 31; + x[ 3] = x[ 3] >> 1 | x[ 2] << 31; + x[ 2] = x[ 2] >> 1 | x[ 1] << 31; + x[ 1] = x[ 1] >> 1 | x[ 0] << 31; x[ 0] = x[ 0] >> 1; // if (a >= r) a -= r; - const u32 l2 = (a[ 0] < r[ 0]) << 0 - | (a[ 1] < r[ 1]) << 1 - | (a[ 2] < r[ 2]) << 2 - | (a[ 3] < r[ 3]) << 3 - | (a[ 4] < r[ 4]) << 4 - | (a[ 5] < r[ 5]) << 5 - | (a[ 6] < r[ 6]) << 6 - | (a[ 7] < r[ 7]) << 7 - | (a[ 8] < r[ 8]) << 8 - | (a[ 9] < r[ 9]) << 9 - | (a[10] < r[10]) << 10 - | (a[11] < r[11]) << 11 - | (a[12] < r[12]) << 12 - | (a[13] < r[13]) << 13 - | (a[14] < r[14]) << 14 - | (a[15] < r[15]) << 15; + l00 = a[ 0] < r[ 0]; + l01 = a[ 1] < r[ 1]; + l02 = a[ 2] < r[ 2]; + l03 = a[ 3] < r[ 3]; + l04 = a[ 4] < r[ 4]; + l05 = a[ 5] < r[ 5]; + l06 = a[ 6] < r[ 6]; + l07 = a[ 7] < r[ 7]; + l08 = a[ 8] < r[ 8]; + l09 = a[ 9] < r[ 9]; + l10 = a[10] < r[10]; + l11 = a[11] < r[11]; + l12 = a[12] < r[12]; + l13 = a[13] < r[13]; + l14 = a[14] < r[14]; + l15 = a[15] < r[15]; - const u32 e2 = (a[ 0] == r[ 0]) << 0 - | (a[ 1] == r[ 1]) << 1 - | (a[ 2] == r[ 2]) << 2 - | (a[ 3] == r[ 3]) << 3 - | (a[ 4] == r[ 4]) << 4 - | (a[ 5] == r[ 5]) << 5 - | (a[ 6] == r[ 6]) << 6 - | (a[ 7] == r[ 7]) << 7 - | (a[ 8] == r[ 8]) << 8 - | (a[ 9] == r[ 9]) << 9 - | (a[10] == r[10]) << 10 - | (a[11] == r[11]) << 11 - | (a[12] == r[12]) << 12 - | (a[13] == r[13]) << 13 - | (a[14] == r[14]) << 14 - | (a[15] == r[15]) << 15; + e00 = a[ 0] == r[ 0]; + e01 = a[ 1] == r[ 1]; + e02 = a[ 2] == r[ 2]; + e03 = a[ 3] == r[ 3]; + e04 = a[ 4] == r[ 4]; + e05 = a[ 5] == r[ 5]; + e06 = a[ 6] == r[ 6]; + e07 = a[ 7] == r[ 7]; + e08 = a[ 8] == r[ 8]; + e09 = a[ 9] == r[ 9]; + e10 = a[10] == r[10]; + e11 = a[11] == r[11]; + e12 = a[12] == r[12]; + e13 = a[13] == r[13]; + e14 = a[14] == r[14]; - if (l2) - { - if (l2 & 0x0001) continue; - if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue; - if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue; - if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue; - if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue; - if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue; - if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue; - if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue; - if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue; - if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue; - if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue; - if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue; - if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue; - if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue; - if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue; - if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue; - } + if (l00) continue; + if (l01 && e00) continue; + if (l02 && e00 && e01) continue; + if (l03 && e00 && e01 && e02) continue; + if (l04 && e00 && e01 && e02 && e03) continue; + if (l05 && e00 && e01 && e02 && e03 && e04) continue; + if (l06 && e00 && e01 && e02 && e03 && e04 && e05) continue; + if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) continue; + if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) continue; + if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) continue; + if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) continue; + if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) continue; + if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) continue; + if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) continue; + if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) continue; + if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) continue; // substract (a -= r):