Replace IS_ROCM with HAS_VPERM and HAS_VADD3

pull/1935/head
jsteube 5 years ago
parent 2c0c82c8af
commit 540b405e3a

@ -417,7 +417,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x3727);
out1[0] = hc_byte_perm (in[0], 0, 0x1707);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x03070207);
out2[2] = hc_byte_perm (in[3], 0, 0x01070007);
@ -455,7 +455,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x1707);
out1[0] = hc_byte_perm (in[0], 0, 0x3727);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x01070007);
out2[2] = hc_byte_perm (in[3], 0, 0x03070207);
@ -493,7 +493,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x7372);
out1[0] = hc_byte_perm (in[0], 0, 0x7170);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x07030702);
out2[2] = hc_byte_perm (in[3], 0, 0x07010700);
@ -531,7 +531,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x7170);
out1[0] = hc_byte_perm (in[0], 0, 0x7372);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x07010700);
out2[2] = hc_byte_perm (in[3], 0, 0x07030702);
@ -565,7 +565,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out)
out[2] = hc_byte_perm (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm (in2[2], in2[3], 0x4602);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002);
@ -595,7 +595,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out)
out[2] = hc_byte_perm (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm (in2[2], in2[3], 0x6420);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200);
@ -723,7 +723,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
w0[0] = swap32 (w0[0]);
w0[1] = swap32 (w0[1]);
w0[2] = swap32 (w0[2]);
@ -1082,7 +1082,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
w3[3] = swap32 (w3[3]);
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
@ -2740,7 +2740,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
@ -3067,7 +3067,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -3407,7 +3407,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -3868,7 +3868,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -4347,7 +4347,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
w0[0] = swap32 (w0[0]);
w0[1] = swap32 (w0[1]);
w0[2] = swap32 (w0[2]);
@ -5570,7 +5570,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
w7[3] = swap32 (w7[3]);
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
@ -6149,7 +6149,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -7306,7 +7306,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -8477,7 +8477,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -10162,7 +10162,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -11865,7 +11865,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#pragma unroll
for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]);
@ -16230,7 +16230,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
@ -20601,7 +20601,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -24958,7 +24958,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -31297,7 +31297,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2)
out1[1] = hc_byte_perm_S (in[0], 0, 0x3727);
out1[0] = hc_byte_perm_S (in[0], 0, 0x1707);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207);
out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007);
@ -31335,7 +31335,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2)
out1[1] = hc_byte_perm_S (in[0], 0, 0x7372);
out1[0] = hc_byte_perm_S (in[0], 0, 0x7170);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702);
out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700);
@ -31369,7 +31369,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
@ -31399,7 +31399,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
#elif defined IS_AMD && AMD_GCN >= 3
#elif defined IS_AMD && HAS_VPERM
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
@ -31428,7 +31428,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
w0[0] = swap32_S (w0[0]);
w0[1] = swap32_S (w0[1]);
w0[2] = swap32_S (w0[2]);
@ -31787,7 +31787,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
w3[3] = swap32_S (w3[3]);
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
@ -33444,7 +33444,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -33769,7 +33769,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -34108,7 +34108,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -34569,7 +34569,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -35048,7 +35048,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
w0[0] = swap32_S (w0[0]);
w0[1] = swap32_S (w0[1]);
w0[2] = swap32_S (w0[2]);
@ -36271,7 +36271,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
w7[3] = swap32_S (w7[3]);
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
@ -36850,7 +36850,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -38007,7 +38007,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -39178,7 +39178,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -40863,7 +40863,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
@ -42566,7 +42566,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#pragma unroll
for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]);
@ -46931,7 +46931,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
@ -51302,7 +51302,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -55659,7 +55659,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
}
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;

@ -786,7 +786,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
const int offset_switch = offset / 4;
#endif
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
const u32 src_r00 = swap32_S (src_r0[0]);
const u32 src_r01 = swap32_S (src_r0[1]);
const u32 src_r02 = swap32_S (src_r0[2]);
@ -898,7 +898,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
s7 = swap32_S (s7);
#endif
#if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;

@ -175,7 +175,7 @@ DECLSPEC u64x hl32_to_64 (const u32x a, const u32x b)
#ifdef IS_AMD
#if AMD_GCN >= 3
#if HAS_VPERM
DECLSPEC u32 swap32_S (const u32 v)
{
u32 r;
@ -240,7 +240,7 @@ DECLSPEC u64 rotl64_S (const u64 a, const u32 n)
return rotr64_S (a, 64 - n);
}
#if AMD_GCN >= 3
#if HAS_VPERM
DECLSPEC u32x swap32 (const u32x v)
{
return bitselect (rotate (v, 24u), rotate (v, 8u), 0x00ff00ffu);
@ -371,7 +371,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
return amd_bytealign (a, b, c);
}
#if AMD_GCN >= 3
#if HAS_VPERM
DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const u32x c)
{
u32x r;
@ -435,7 +435,7 @@ DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const u32 c)
}
#endif
#if AMD_GCN >= 5
#if HAS_VADD3
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
{
u32x r;

@ -25,26 +25,6 @@
#if VENDOR_ID == (1 << 0)
#define IS_AMD
#define AMD_GCN 0
#if AMD_ROCM == 1
#if defined __gfx600__ || defined __gfx601__
#undef AMD_GCN
#define AMD_GCN 1
#endif
#if defined __gfx700__ || defined __gfx701__ || defined __gfx702__ || defined __gfx703__
#undef AMD_GCN
#define AMD_GCN 2
#endif
#if defined __gfx800__ || defined __gfx801__ || defined __gfx802__ || defined __gfx803__ || defined __gfx804__ || defined __gfx810__
#undef AMD_GCN
#define AMD_GCN 3
// According to AMD docs, GCN 3 and 4 are the same
#endif
#if defined __gfx900__ || defined __gfx901__ || defined __gfx902__ || defined __gfx903__
#undef AMD_GCN
#define AMD_GCN 5
#endif
#endif
#elif VENDOR_ID == (1 << 1)
#define IS_APPLE
#define IS_GENERIC
@ -95,7 +75,9 @@
#define DECLSPEC
#endif
#if (defined IS_AMD && AMD_GCN < 3)
// HAS_VPERM indicated ROCM
#if (defined IS_AMD && HAS_VPERM == 0)
#define MAYBE_VOLATILE volatile
#else
#define MAYBE_VOLATILE

@ -30,7 +30,7 @@ int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context
int hc_clCreateContext (hashcat_ctx_t *hashcat_ctx, cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *), void *user_data, cl_context *context);
int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program, const char *kernel_name, cl_kernel *kernel);
int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, unsigned char **binaries, cl_int *binary_status, cl_program *program);
int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, char **strings, const size_t *lengths, cl_program *program);
int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program);
int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf);
int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);

@ -1258,7 +1258,8 @@ typedef struct hc_device_param
char *driver_version;
char *device_opencl_version;
bool is_rocm;
bool has_vperm;
bool has_vadd3;
double spin_damp;

@ -333,6 +333,31 @@ static bool write_kernel_binary (hashcat_ctx_t *hashcat_ctx, char *kernel_file,
return true;
}
static bool test_instruction (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_buf)
{
int CL_rc;
cl_program program;
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, &kernel_buf, NULL, &program);
if (CL_rc == -1) return false;
opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
OCL_PTR *ocl = opencl_ctx->ocl;
CL_rc = ocl->clBuildProgram (program, 1, &device_param->device, NULL, NULL, NULL); // do not use the wrapper to avoid the error message
const bool r = (CL_rc == CL_SUCCESS) ? true : false;
CL_rc = hc_clReleaseProgram (hashcat_ctx, program);
if (CL_rc == -1) return false;
return r;
}
void generate_source_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file)
{
if (opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
@ -859,7 +884,7 @@ int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_fl
return 0;
}
int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, char **strings, const size_t *lengths, cl_program *program)
int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program)
{
opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
@ -867,7 +892,7 @@ int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context
cl_int CL_err;
*program = ocl->clCreateProgramWithSource (context, count, (const char **) strings, lengths, &CL_err);
*program = ocl->clCreateProgramWithSource (context, count, strings, lengths, &CL_err);
if (CL_err != CL_SUCCESS)
{
@ -3811,25 +3836,15 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
bool amd_warn = true;
#if defined (__linux__)
if (device_param->is_rocm == false)
{
// ROCm is so much better, we should give the user some hint and remove this block
// AMDGPU-PRO Driver 16.40 and higher
if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false;
// AMDGPU-PRO Driver 16.50 is known to be broken
if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true;
// AMDGPU-PRO Driver 16.60 is known to be broken
if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true;
// AMDGPU-PRO Driver 17.10 is known to be broken
if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true;
// AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
}
else
{
// Support for ROCm platform
if (strtof (device_param->driver_version, NULL) >= 1.1f) amd_warn = false;
}
// AMDGPU-PRO Driver 16.40 and higher
if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false;
// AMDGPU-PRO Driver 16.50 is known to be broken
if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true;
// AMDGPU-PRO Driver 16.60 is known to be broken
if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true;
// AMDGPU-PRO Driver 17.10 is known to be broken
if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true;
// AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
#elif defined (_WIN)
// AMD Radeon Software 14.9 and higher, should be updated to 15.12
if (strtoul (device_param->driver_version, NULL, 10) >= 1573) amd_warn = false;
@ -4592,34 +4607,13 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD))
{
char *kernel_buf = "__kernel void test (__global int *p) { __asm__ (\"DS_NOP\"); }";
const size_t kernel_len = strlen (kernel_buf);
cl_program program;
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, &kernel_buf, &kernel_len, &program);
if (CL_rc == -1) return -1;
opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
const bool has_vperm = test_instruction (hashcat_ctx, device_param, "__kernel void test () { uint r; __asm__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
OCL_PTR *ocl = opencl_ctx->ocl;
device_param->has_vperm = has_vperm;
CL_rc = ocl->clBuildProgram (program, 1, &device_param->device, NULL, NULL, NULL);
const bool has_vadd3 = test_instruction (hashcat_ctx, device_param, "__kernel void test () { uint r; __asm__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
if (CL_rc == CL_SUCCESS)
{
device_param->is_rocm = true;
}
else
{
device_param->is_rocm = false;
}
CL_rc = hc_clReleaseProgram (hashcat_ctx, program);
if (CL_rc == -1) return -1;
device_param->has_vadd3 = has_vadd3;
}
// device_available_mem
@ -4629,7 +4623,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->is_rocm == false))))
if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD)))
{
// OK, so the problem here is the following:
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
@ -4875,9 +4869,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
char build_opts[2048] = { 0 };
#if defined (DEBUG)
snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
#else
snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
#endif
/*
@ -4986,7 +4980,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (rc_read_kernel == false) return -1;
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program);
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program);
if (CL_rc == -1) return -1;
@ -5065,7 +5059,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (rc_read_kernel == false) return -1;
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program);
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program);
if (CL_rc == -1) return -1;
@ -5194,7 +5188,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (rc_read_kernel == false) return -1;
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program_mp);
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program_mp);
if (CL_rc == -1) return -1;
@ -5341,7 +5335,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (rc_read_kernel == false) return -1;
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program_amp);
CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program_amp);
if (CL_rc == -1) return -1;

Loading…
Cancel
Save