Inline assembly optimization for 256 bit ADD and SUB in inc_ecc_secp256k1.cl

pull/2246/head
Jens Steube 5 years ago
parent 5c97e6797a
commit cb24236067

@ -108,6 +108,21 @@ DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8])
{
u32 c = 0; // carry/borrow
#ifdef IS_NV
asm("sub.cc.u32 %0, %9, %17;"
"subc.cc.u32 %1, %10, %18;"
"subc.cc.u32 %2, %11, %19;"
"subc.cc.u32 %3, %12, %20;"
"subc.cc.u32 %4, %13, %21;"
"subc.cc.u32 %5, %14, %22;"
"subc.cc.u32 %6, %15, %23;"
"subc.cc.u32 %7, %16, %24;"
"subc.u32 %8, 0, 0;"
: "=r"(r[0]), "=r"(r[1]), "=r"(r[2]), "=r"(r[3]), "=r"(r[4]), "=r"(r[5]), "=r"(r[6]), "=r"(r[7]),
"=r"(c)
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]));
#else
for (u32 i = 0; i < 8; i++)
{
const u32 diff = a[i] - b[i] - c;
@ -115,15 +130,31 @@ DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8])
if (diff != a[i]) c = (diff > a[i]);
r[i] = diff;
}
}
#endif
return c;
return c;
}
DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8])
{
u32 c = 0; // carry/borrow
#ifdef IS_NV
asm("add.cc.u32 %0, %9, %17;"
"addc.cc.u32 %1, %10, %18;"
"addc.cc.u32 %2, %11, %19;"
"addc.cc.u32 %3, %12, %20;"
"addc.cc.u32 %4, %13, %21;"
"addc.cc.u32 %5, %14, %22;"
"addc.cc.u32 %6, %15, %23;"
"addc.cc.u32 %7, %16, %24;"
"addc.u32 %8, 0, 0;"
: "=r"(r[0]), "=r"(r[1]), "=r"(r[2]), "=r"(r[3]), "=r"(r[4]), "=r"(r[5]), "=r"(r[6]), "=r"(r[7]),
"=r"(c)
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]));
#else
for (u32 i = 0; i < 8; i++)
{
const u32 t = a[i] + b[i] + c;
@ -132,6 +163,7 @@ DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8])
r[i] = t;
}
#endif
return c;
}

Loading…
Cancel
Save