diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index 55a4469e5..3318298ff 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -108,6 +108,41 @@ DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8]) { u32 c = 0; // carry/borrow + #if defined IS_NV && HAS_SUB == 1 && HAS_SUBC == 1 + asm volatile + ( + "sub.cc.u32 %0, %9, %17;" + "subc.cc.u32 %1, %10, %18;" + "subc.cc.u32 %2, %11, %19;" + "subc.cc.u32 %3, %12, %20;" + "subc.cc.u32 %4, %13, %21;" + "subc.cc.u32 %5, %14, %22;" + "subc.cc.u32 %6, %15, %23;" + "subc.cc.u32 %7, %16, %24;" + "subc.u32 %8, 0, 0;" + : "=r"(r[0]), "=r"(r[1]), "=r"(r[2]), "=r"(r[3]), "=r"(r[4]), "=r"(r[5]), "=r"(r[6]), "=r"(r[7]), + "=r"(c) + : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), + "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) + ); + #elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1 + __asm__ __volatile__ + ( + "V_SUB_U32 %0, %9, %17;" + "V_SUBB_U32 %1, %10, %18;" + "V_SUBB_U32 %2, %11, %19;" + "V_SUBB_U32 %3, %12, %20;" + "V_SUBB_U32 %4, %13, %21;" + "V_SUBB_U32 %5, %14, %22;" + "V_SUBB_U32 %6, %15, %23;" + "V_SUBB_U32 %7, %16, %24;" + "V_SUBB_U32 %8, 0, 0;" + : "=v"(r[0]), "=v"(r[1]), "=v"(r[2]), "=v"(r[3]), "=v"(r[4]), "=v"(r[5]), "=v"(r[6]), "=v"(r[7]), + "=v"(c) + : "v"(a[0]), "v"(a[1]), "v"(a[2]), "v"(a[3]), "v"(a[4]), "v"(a[5]), "v"(a[6]), "v"(a[7]), + "v"(b[0]), "v"(b[1]), "v"(b[2]), "v"(b[3]), "v"(b[4]), "v"(b[5]), "v"(b[6]), "v"(b[7]) + ); + #else for (u32 i = 0; i < 8; i++) { const u32 diff = a[i] - b[i] - c; @@ -115,15 +150,51 @@ DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8]) if (diff != a[i]) c = (diff > a[i]); r[i] = diff; - } + } + #endif - return c; + return c; } DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8]) { u32 c = 0; // carry/borrow + #if defined IS_NV && HAS_ADD == 1 && HAS_ADDC == 1 + asm volatile + ( + "add.cc.u32 %0, %9, %17;" + "addc.cc.u32 %1, %10, %18;" + "addc.cc.u32 %2, %11, %19;" + "addc.cc.u32 %3, %12, %20;" + "addc.cc.u32 %4, %13, %21;" + "addc.cc.u32 %5, %14, %22;" + "addc.cc.u32 %6, %15, %23;" + "addc.cc.u32 %7, %16, %24;" + "addc.u32 %8, 0, 0;" + : "=r"(r[0]), "=r"(r[1]), "=r"(r[2]), "=r"(r[3]), "=r"(r[4]), "=r"(r[5]), "=r"(r[6]), "=r"(r[7]), + "=r"(c) + : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), + "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) + ); + #elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1 + __asm__ __volatile__ + ( + "V_ADD_U32 %0, %9, %17;" + "V_ADDC_U32 %1, %10, %18;" + "V_ADDC_U32 %2, %11, %19;" + "V_ADDC_U32 %3, %12, %20;" + "V_ADDC_U32 %4, %13, %21;" + "V_ADDC_U32 %5, %14, %22;" + "V_ADDC_U32 %6, %15, %23;" + "V_ADDC_U32 %7, %16, %24;" + "V_ADDC_U32 %8, 0, 0;" + : "=v"(r[0]), "=v"(r[1]), "=v"(r[2]), "=v"(r[3]), "=v"(r[4]), "=v"(r[5]), "=v"(r[6]), "=v"(r[7]), + "=v"(c) + : "v"(a[0]), "v"(a[1]), "v"(a[2]), "v"(a[3]), "v"(a[4]), "v"(a[5]), "v"(a[6]), "v"(a[7]), + "v"(b[0]), "v"(b[1]), "v"(b[2]), "v"(b[3]), "v"(b[4]), "v"(b[5]), "v"(b[6]), "v"(b[7]) + ); + #else for (u32 i = 0; i < 8; i++) { const u32 t = a[i] + b[i] + c; @@ -132,6 +203,7 @@ DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8]) r[i] = t; } + #endif return c; } @@ -297,59 +369,55 @@ DECLSPEC void mod_512 (u32 n[16]) while (a[0] >= b[0]) { - const u32 l1 = (a[ 0] < b[ 0]) << 0 - | (a[ 1] < b[ 1]) << 1 - | (a[ 2] < b[ 2]) << 2 - | (a[ 3] < b[ 3]) << 3 - | (a[ 4] < b[ 4]) << 4 - | (a[ 5] < b[ 5]) << 5 - | (a[ 6] < b[ 6]) << 6 - | (a[ 7] < b[ 7]) << 7 - | (a[ 8] < b[ 8]) << 8 - | (a[ 9] < b[ 9]) << 9 - | (a[10] < b[10]) << 10 - | (a[11] < b[11]) << 11 - | (a[12] < b[12]) << 12 - | (a[13] < b[13]) << 13 - | (a[14] < b[14]) << 14 - | (a[15] < b[15]) << 15; + u32 l00 = a[ 0] < b[ 0]; + u32 l01 = a[ 1] < b[ 1]; + u32 l02 = a[ 2] < b[ 2]; + u32 l03 = a[ 3] < b[ 3]; + u32 l04 = a[ 4] < b[ 4]; + u32 l05 = a[ 5] < b[ 5]; + u32 l06 = a[ 6] < b[ 6]; + u32 l07 = a[ 7] < b[ 7]; + u32 l08 = a[ 8] < b[ 8]; + u32 l09 = a[ 9] < b[ 9]; + u32 l10 = a[10] < b[10]; + u32 l11 = a[11] < b[11]; + u32 l12 = a[12] < b[12]; + u32 l13 = a[13] < b[13]; + u32 l14 = a[14] < b[14]; + u32 l15 = a[15] < b[15]; - const u32 e1 = (a[ 0] == b[ 0]) << 0 - | (a[ 1] == b[ 1]) << 1 - | (a[ 2] == b[ 2]) << 2 - | (a[ 3] == b[ 3]) << 3 - | (a[ 4] == b[ 4]) << 4 - | (a[ 5] == b[ 5]) << 5 - | (a[ 6] == b[ 6]) << 6 - | (a[ 7] == b[ 7]) << 7 - | (a[ 8] == b[ 8]) << 8 - | (a[ 9] == b[ 9]) << 9 - | (a[10] == b[10]) << 10 - | (a[11] == b[11]) << 11 - | (a[12] == b[12]) << 12 - | (a[13] == b[13]) << 13 - | (a[14] == b[14]) << 14 - | (a[15] == b[15]) << 15; + u32 e00 = a[ 0] == b[ 0]; + u32 e01 = a[ 1] == b[ 1]; + u32 e02 = a[ 2] == b[ 2]; + u32 e03 = a[ 3] == b[ 3]; + u32 e04 = a[ 4] == b[ 4]; + u32 e05 = a[ 5] == b[ 5]; + u32 e06 = a[ 6] == b[ 6]; + u32 e07 = a[ 7] == b[ 7]; + u32 e08 = a[ 8] == b[ 8]; + u32 e09 = a[ 9] == b[ 9]; + u32 e10 = a[10] == b[10]; + u32 e11 = a[11] == b[11]; + u32 e12 = a[12] == b[12]; + u32 e13 = a[13] == b[13]; + u32 e14 = a[14] == b[14]; - if (l1) - { - if (l1 & 0x0001) break; - if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break; - if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break; - if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break; - if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break; - if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break; - if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break; - if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break; - if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break; - if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break; - if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break; - if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break; - if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break; - if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break; - if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break; - if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break; - } + if (l00) break; + if (l01 && e00) break; + if (l02 && e00 && e01) break; + if (l03 && e00 && e01 && e02) break; + if (l04 && e00 && e01 && e02 && e03) break; + if (l05 && e00 && e01 && e02 && e03 && e04) break; + if (l06 && e00 && e01 && e02 && e03 && e04 && e05) break; + if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) break; + if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) break; + if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) break; + if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) break; + if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) break; + if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) break; + if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) break; + if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) break; + if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) break; // r = x (copy it to have the original values for the subtraction) @@ -372,80 +440,76 @@ DECLSPEC void mod_512 (u32 n[16]) r[14] = x[14]; r[15] = x[15]; - // x >>= 1 + // x <<= 1 - x[15] = x[15] >> 1 | (x[14] & 1) << 31; - x[14] = x[14] >> 1 | (x[13] & 1) << 31; - x[13] = x[13] >> 1 | (x[12] & 1) << 31; - x[12] = x[12] >> 1 | (x[11] & 1) << 31; - x[11] = x[11] >> 1 | (x[10] & 1) << 31; - x[10] = x[10] >> 1 | (x[ 9] & 1) << 31; - x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31; - x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31; - x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31; - x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31; - x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31; - x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31; - x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31; - x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31; - x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31; + x[15] = x[15] >> 1 | x[14] << 31; + x[14] = x[14] >> 1 | x[13] << 31; + x[13] = x[13] >> 1 | x[12] << 31; + x[12] = x[12] >> 1 | x[11] << 31; + x[11] = x[11] >> 1 | x[10] << 31; + x[10] = x[10] >> 1 | x[ 9] << 31; + x[ 9] = x[ 9] >> 1 | x[ 8] << 31; + x[ 8] = x[ 8] >> 1 | x[ 7] << 31; + x[ 7] = x[ 7] >> 1 | x[ 6] << 31; + x[ 6] = x[ 6] >> 1 | x[ 5] << 31; + x[ 5] = x[ 5] >> 1 | x[ 4] << 31; + x[ 4] = x[ 4] >> 1 | x[ 3] << 31; + x[ 3] = x[ 3] >> 1 | x[ 2] << 31; + x[ 2] = x[ 2] >> 1 | x[ 1] << 31; + x[ 1] = x[ 1] >> 1 | x[ 0] << 31; x[ 0] = x[ 0] >> 1; // if (a >= r) a -= r; - const u32 l2 = (a[ 0] < r[ 0]) << 0 - | (a[ 1] < r[ 1]) << 1 - | (a[ 2] < r[ 2]) << 2 - | (a[ 3] < r[ 3]) << 3 - | (a[ 4] < r[ 4]) << 4 - | (a[ 5] < r[ 5]) << 5 - | (a[ 6] < r[ 6]) << 6 - | (a[ 7] < r[ 7]) << 7 - | (a[ 8] < r[ 8]) << 8 - | (a[ 9] < r[ 9]) << 9 - | (a[10] < r[10]) << 10 - | (a[11] < r[11]) << 11 - | (a[12] < r[12]) << 12 - | (a[13] < r[13]) << 13 - | (a[14] < r[14]) << 14 - | (a[15] < r[15]) << 15; + l00 = a[ 0] < r[ 0]; + l01 = a[ 1] < r[ 1]; + l02 = a[ 2] < r[ 2]; + l03 = a[ 3] < r[ 3]; + l04 = a[ 4] < r[ 4]; + l05 = a[ 5] < r[ 5]; + l06 = a[ 6] < r[ 6]; + l07 = a[ 7] < r[ 7]; + l08 = a[ 8] < r[ 8]; + l09 = a[ 9] < r[ 9]; + l10 = a[10] < r[10]; + l11 = a[11] < r[11]; + l12 = a[12] < r[12]; + l13 = a[13] < r[13]; + l14 = a[14] < r[14]; + l15 = a[15] < r[15]; - const u32 e2 = (a[ 0] == r[ 0]) << 0 - | (a[ 1] == r[ 1]) << 1 - | (a[ 2] == r[ 2]) << 2 - | (a[ 3] == r[ 3]) << 3 - | (a[ 4] == r[ 4]) << 4 - | (a[ 5] == r[ 5]) << 5 - | (a[ 6] == r[ 6]) << 6 - | (a[ 7] == r[ 7]) << 7 - | (a[ 8] == r[ 8]) << 8 - | (a[ 9] == r[ 9]) << 9 - | (a[10] == r[10]) << 10 - | (a[11] == r[11]) << 11 - | (a[12] == r[12]) << 12 - | (a[13] == r[13]) << 13 - | (a[14] == r[14]) << 14 - | (a[15] == r[15]) << 15; + e00 = a[ 0] == r[ 0]; + e01 = a[ 1] == r[ 1]; + e02 = a[ 2] == r[ 2]; + e03 = a[ 3] == r[ 3]; + e04 = a[ 4] == r[ 4]; + e05 = a[ 5] == r[ 5]; + e06 = a[ 6] == r[ 6]; + e07 = a[ 7] == r[ 7]; + e08 = a[ 8] == r[ 8]; + e09 = a[ 9] == r[ 9]; + e10 = a[10] == r[10]; + e11 = a[11] == r[11]; + e12 = a[12] == r[12]; + e13 = a[13] == r[13]; + e14 = a[14] == r[14]; - if (l2) - { - if (l2 & 0x0001) continue; - if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue; - if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue; - if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue; - if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue; - if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue; - if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue; - if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue; - if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue; - if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue; - if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue; - if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue; - if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue; - if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue; - if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue; - if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue; - } + if (l00) continue; + if (l01 && e00) continue; + if (l02 && e00 && e01) continue; + if (l03 && e00 && e01 && e02) continue; + if (l04 && e00 && e01 && e02 && e03) continue; + if (l05 && e00 && e01 && e02 && e03 && e04) continue; + if (l06 && e00 && e01 && e02 && e03 && e04 && e05) continue; + if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) continue; + if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) continue; + if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) continue; + if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) continue; + if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) continue; + if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) continue; + if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) continue; + if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) continue; + if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) continue; // substract (a -= r): diff --git a/include/types.h b/include/types.h index 8b4901b64..0b38e27aa 100644 --- a/include/types.h +++ b/include/types.h @@ -1238,11 +1238,19 @@ typedef struct hc_device_param hc_timer_t timer_speed; // AMD + bool has_vadd; + bool has_vaddc; + bool has_vsub; + bool has_vsubb; bool has_vadd3; bool has_vbfe; bool has_vperm; // NV + bool has_add; + bool has_addc; + bool has_sub; + bool has_subc; bool has_bfe; bool has_lop3; bool has_mov64; diff --git a/src/backend.c b/src/backend.c index c205ae7d0..c70d47095 100644 --- a/src/backend.c +++ b/src/backend.c @@ -5432,6 +5432,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) // //if (rc_cuCtxSetCacheConfig == -1) return -1; + device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); + device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); + device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); + device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); @@ -6132,6 +6136,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)) { + device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD_U32 %0, 0, 0;\" : \"=v\"(r)); }"); + device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADDC_U32 %0, 0, 0;\" : \"=v\"(r)); }"); + device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_SUB_U32 %0, 0, 0;\" : \"=v\"(r)); }"); + device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_SUBB_U32 %0, 0, 0;\" : \"=v\"(r)); }"); device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); @@ -6139,6 +6147,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) { + device_param->has_add = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); + device_param->has_addc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); + device_param->has_sub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); + device_param->has_subc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); @@ -7007,9 +7019,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // we don't have sm_* on vendors not NV but it doesn't matter #if defined (DEBUG) - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vsub, device_param->has_vsubb, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); #else - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vsub, device_param->has_vsubb, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); #endif build_options_buf[build_options_len] = 0; diff --git a/src/modules/module_17200.c b/src/modules/module_17200.c index a94a30ec7..015c30865 100644 --- a/src/modules/module_17200.c +++ b/src/modules/module_17200.c @@ -163,6 +163,17 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } +bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + // hangs somewhere in zlib inflate + if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) + { + return true; + } + + return false; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (pkzip_t); @@ -419,6 +430,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = MODULE_DEFAULT; - module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_unstable_warning = module_unstable_warning; module_ctx->module_warmup_disable = MODULE_DEFAULT; } diff --git a/src/modules/module_17220.c b/src/modules/module_17220.c index 403640d94..8b17b39d9 100644 --- a/src/modules/module_17220.c +++ b/src/modules/module_17220.c @@ -163,6 +163,17 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } +bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + // hangs somewhere in zlib inflate + if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) + { + return true; + } + + return false; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (pkzip_t); @@ -422,6 +433,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = MODULE_DEFAULT; - module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_unstable_warning = module_unstable_warning; module_ctx->module_warmup_disable = MODULE_DEFAULT; } diff --git a/src/modules/module_17225.c b/src/modules/module_17225.c index 66907159f..4fecbf47a 100644 --- a/src/modules/module_17225.c +++ b/src/modules/module_17225.c @@ -163,6 +163,17 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } +bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + // hangs somewhere in zlib inflate + if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) + { + return true; + } + + return false; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (pkzip_t); @@ -422,6 +433,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = MODULE_DEFAULT; - module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_unstable_warning = module_unstable_warning; module_ctx->module_warmup_disable = MODULE_DEFAULT; } diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c index 12ffbd834..3b0eb3ba1 100644 --- a/src/modules/module_21800.c +++ b/src/modules/module_21800.c @@ -94,6 +94,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } +bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + // hangs somewhere in zlib inflate + if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) + { + return true; + } + + return false; +} + int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) { u32 *digest = (u32 *) digest_buf; @@ -315,6 +326,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_unstable_warning = module_unstable_warning; module_ctx->module_warmup_disable = MODULE_DEFAULT; }