Use a different code for mod_512() to help some NV GPU to not hang

pull/2246/head
Jens Steube 5 years ago
parent 53254b45aa
commit 75b4164498

@ -369,59 +369,55 @@ DECLSPEC void mod_512 (u32 n[16])
while (a[0] >= b[0]) while (a[0] >= b[0])
{ {
const u32 l1 = (a[ 0] < b[ 0]) << 0 u32 l00 = a[ 0] < b[ 0];
| (a[ 1] < b[ 1]) << 1 u32 l01 = a[ 1] < b[ 1];
| (a[ 2] < b[ 2]) << 2 u32 l02 = a[ 2] < b[ 2];
| (a[ 3] < b[ 3]) << 3 u32 l03 = a[ 3] < b[ 3];
| (a[ 4] < b[ 4]) << 4 u32 l04 = a[ 4] < b[ 4];
| (a[ 5] < b[ 5]) << 5 u32 l05 = a[ 5] < b[ 5];
| (a[ 6] < b[ 6]) << 6 u32 l06 = a[ 6] < b[ 6];
| (a[ 7] < b[ 7]) << 7 u32 l07 = a[ 7] < b[ 7];
| (a[ 8] < b[ 8]) << 8 u32 l08 = a[ 8] < b[ 8];
| (a[ 9] < b[ 9]) << 9 u32 l09 = a[ 9] < b[ 9];
| (a[10] < b[10]) << 10 u32 l10 = a[10] < b[10];
| (a[11] < b[11]) << 11 u32 l11 = a[11] < b[11];
| (a[12] < b[12]) << 12 u32 l12 = a[12] < b[12];
| (a[13] < b[13]) << 13 u32 l13 = a[13] < b[13];
| (a[14] < b[14]) << 14 u32 l14 = a[14] < b[14];
| (a[15] < b[15]) << 15; u32 l15 = a[15] < b[15];
const u32 e1 = (a[ 0] == b[ 0]) << 0 u32 e00 = a[ 0] == b[ 0];
| (a[ 1] == b[ 1]) << 1 u32 e01 = a[ 1] == b[ 1];
| (a[ 2] == b[ 2]) << 2 u32 e02 = a[ 2] == b[ 2];
| (a[ 3] == b[ 3]) << 3 u32 e03 = a[ 3] == b[ 3];
| (a[ 4] == b[ 4]) << 4 u32 e04 = a[ 4] == b[ 4];
| (a[ 5] == b[ 5]) << 5 u32 e05 = a[ 5] == b[ 5];
| (a[ 6] == b[ 6]) << 6 u32 e06 = a[ 6] == b[ 6];
| (a[ 7] == b[ 7]) << 7 u32 e07 = a[ 7] == b[ 7];
| (a[ 8] == b[ 8]) << 8 u32 e08 = a[ 8] == b[ 8];
| (a[ 9] == b[ 9]) << 9 u32 e09 = a[ 9] == b[ 9];
| (a[10] == b[10]) << 10 u32 e10 = a[10] == b[10];
| (a[11] == b[11]) << 11 u32 e11 = a[11] == b[11];
| (a[12] == b[12]) << 12 u32 e12 = a[12] == b[12];
| (a[13] == b[13]) << 13 u32 e13 = a[13] == b[13];
| (a[14] == b[14]) << 14 u32 e14 = a[14] == b[14];
| (a[15] == b[15]) << 15;
if (l00) break;
if (l1) if (l01 && e00) break;
{ if (l02 && e00 && e01) break;
if (l1 & 0x0001) break; if (l03 && e00 && e01 && e02) break;
if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break; if (l04 && e00 && e01 && e02 && e03) break;
if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break; if (l05 && e00 && e01 && e02 && e03 && e04) break;
if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break; if (l06 && e00 && e01 && e02 && e03 && e04 && e05) break;
if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break; if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) break;
if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break; if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) break;
if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break; if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) break;
if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break; if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) break;
if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break; if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) break;
if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break; if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) break;
if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break; if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) break;
if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break; if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) break;
if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break; if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) break;
if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break;
if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break;
if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break;
}
// r = x (copy it to have the original values for the subtraction) // r = x (copy it to have the original values for the subtraction)
@ -444,80 +440,76 @@ DECLSPEC void mod_512 (u32 n[16])
r[14] = x[14]; r[14] = x[14];
r[15] = x[15]; r[15] = x[15];
// x >>= 1 // x <<= 1
x[15] = x[15] >> 1 | (x[14] & 1) << 31; x[15] = x[15] >> 1 | x[14] << 31;
x[14] = x[14] >> 1 | (x[13] & 1) << 31; x[14] = x[14] >> 1 | x[13] << 31;
x[13] = x[13] >> 1 | (x[12] & 1) << 31; x[13] = x[13] >> 1 | x[12] << 31;
x[12] = x[12] >> 1 | (x[11] & 1) << 31; x[12] = x[12] >> 1 | x[11] << 31;
x[11] = x[11] >> 1 | (x[10] & 1) << 31; x[11] = x[11] >> 1 | x[10] << 31;
x[10] = x[10] >> 1 | (x[ 9] & 1) << 31; x[10] = x[10] >> 1 | x[ 9] << 31;
x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31; x[ 9] = x[ 9] >> 1 | x[ 8] << 31;
x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31; x[ 8] = x[ 8] >> 1 | x[ 7] << 31;
x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31; x[ 7] = x[ 7] >> 1 | x[ 6] << 31;
x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31; x[ 6] = x[ 6] >> 1 | x[ 5] << 31;
x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31; x[ 5] = x[ 5] >> 1 | x[ 4] << 31;
x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31; x[ 4] = x[ 4] >> 1 | x[ 3] << 31;
x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31; x[ 3] = x[ 3] >> 1 | x[ 2] << 31;
x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31; x[ 2] = x[ 2] >> 1 | x[ 1] << 31;
x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31; x[ 1] = x[ 1] >> 1 | x[ 0] << 31;
x[ 0] = x[ 0] >> 1; x[ 0] = x[ 0] >> 1;
// if (a >= r) a -= r; // if (a >= r) a -= r;
const u32 l2 = (a[ 0] < r[ 0]) << 0 l00 = a[ 0] < r[ 0];
| (a[ 1] < r[ 1]) << 1 l01 = a[ 1] < r[ 1];
| (a[ 2] < r[ 2]) << 2 l02 = a[ 2] < r[ 2];
| (a[ 3] < r[ 3]) << 3 l03 = a[ 3] < r[ 3];
| (a[ 4] < r[ 4]) << 4 l04 = a[ 4] < r[ 4];
| (a[ 5] < r[ 5]) << 5 l05 = a[ 5] < r[ 5];
| (a[ 6] < r[ 6]) << 6 l06 = a[ 6] < r[ 6];
| (a[ 7] < r[ 7]) << 7 l07 = a[ 7] < r[ 7];
| (a[ 8] < r[ 8]) << 8 l08 = a[ 8] < r[ 8];
| (a[ 9] < r[ 9]) << 9 l09 = a[ 9] < r[ 9];
| (a[10] < r[10]) << 10 l10 = a[10] < r[10];
| (a[11] < r[11]) << 11 l11 = a[11] < r[11];
| (a[12] < r[12]) << 12 l12 = a[12] < r[12];
| (a[13] < r[13]) << 13 l13 = a[13] < r[13];
| (a[14] < r[14]) << 14 l14 = a[14] < r[14];
| (a[15] < r[15]) << 15; l15 = a[15] < r[15];
const u32 e2 = (a[ 0] == r[ 0]) << 0 e00 = a[ 0] == r[ 0];
| (a[ 1] == r[ 1]) << 1 e01 = a[ 1] == r[ 1];
| (a[ 2] == r[ 2]) << 2 e02 = a[ 2] == r[ 2];
| (a[ 3] == r[ 3]) << 3 e03 = a[ 3] == r[ 3];
| (a[ 4] == r[ 4]) << 4 e04 = a[ 4] == r[ 4];
| (a[ 5] == r[ 5]) << 5 e05 = a[ 5] == r[ 5];
| (a[ 6] == r[ 6]) << 6 e06 = a[ 6] == r[ 6];
| (a[ 7] == r[ 7]) << 7 e07 = a[ 7] == r[ 7];
| (a[ 8] == r[ 8]) << 8 e08 = a[ 8] == r[ 8];
| (a[ 9] == r[ 9]) << 9 e09 = a[ 9] == r[ 9];
| (a[10] == r[10]) << 10 e10 = a[10] == r[10];
| (a[11] == r[11]) << 11 e11 = a[11] == r[11];
| (a[12] == r[12]) << 12 e12 = a[12] == r[12];
| (a[13] == r[13]) << 13 e13 = a[13] == r[13];
| (a[14] == r[14]) << 14 e14 = a[14] == r[14];
| (a[15] == r[15]) << 15;
if (l00) continue;
if (l2) if (l01 && e00) continue;
{ if (l02 && e00 && e01) continue;
if (l2 & 0x0001) continue; if (l03 && e00 && e01 && e02) continue;
if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue; if (l04 && e00 && e01 && e02 && e03) continue;
if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue; if (l05 && e00 && e01 && e02 && e03 && e04) continue;
if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue; if (l06 && e00 && e01 && e02 && e03 && e04 && e05) continue;
if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue; if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) continue;
if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue; if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) continue;
if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue; if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) continue;
if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue; if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) continue;
if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue; if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) continue;
if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue; if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) continue;
if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue; if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) continue;
if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue; if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) continue;
if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue; if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) continue;
if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue;
if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue;
if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue;
}
// substract (a -= r): // substract (a -= r):

Loading…
Cancel
Save