Fixed race condition resulting in out of memory error on startup if multiple hashcat instances are started at the same time

pull/2519/head
Jens Steube 4 years ago
parent 6d5e1d3e5d
commit e21463da4b

@ -12,6 +12,13 @@
- Fixed too early execution of some module functions which could make use of non-final values opts_type and opti_type
- Fixed internal access on module option attribute OPTS_TYPE_SUGGEST_KG with the result that it was unused
- Fixed race condition resulting in out of memory error on startup if multiple hashcat instances are started at the same time
##
## Improvements
##
- Startup time: Improved the startup time by avoiding some time intensive operations for skipped devices
* changes v6.1.0 -> v6.1.1

@ -5540,7 +5540,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->skipped = true;
}
// some attributes have to be hardcoded because they are used for instance in the build options
// some attributes have to be hardcoded values because they are used for instance in the build options
device_param->device_local_mem_type = CL_LOCAL;
device_param->opencl_device_type = CL_DEVICE_TYPE_GPU;
@ -5616,11 +5616,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
cuda_devices_active++;
}
CUcontext cuda_context;
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
// instruction set
// bcrypt optimization?
//const int rc_cuCtxSetCacheConfig = hc_cuCtxSetCacheConfig (hashcat_ctx, CU_FUNC_CACHE_PREFER_SHARED);
@ -5638,46 +5634,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->has_mov64 = (sm >= 10) ? true : false;
device_param->has_prmt = (sm >= 20) ? true : false;
/*
#define RUN_INSTRUCTION_CHECKS() \
device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
if (backend_devices_idx > 0)
{
hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
// device_available_mem
if (is_same_device_type (device_param, device_param_prev) == true)
{
device_param->has_add = device_param_prev->has_add;
device_param->has_addc = device_param_prev->has_addc;
device_param->has_sub = device_param_prev->has_sub;
device_param->has_subc = device_param_prev->has_subc;
device_param->has_bfe = device_param_prev->has_bfe;
device_param->has_lop3 = device_param_prev->has_lop3;
device_param->has_mov64 = device_param_prev->has_mov64;
device_param->has_prmt = device_param_prev->has_prmt;
}
else
{
RUN_INSTRUCTION_CHECKS();
}
}
else
{
RUN_INSTRUCTION_CHECKS();
}
CUcontext cuda_context;
#undef RUN_INSTRUCTION_CHECKS
*/
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
// device_available_mem
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
size_t free = 0;
size_t total = 0;
@ -6269,6 +6232,25 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
}
}
// instruction set
// fixed values works only for nvidia devices
// dynamical values for amd see time intensive section below
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
{
const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
device_param->has_add = (sm >= 12) ? true : false;
device_param->has_addc = (sm >= 12) ? true : false;
device_param->has_sub = (sm >= 12) ? true : false;
device_param->has_subc = (sm >= 12) ? true : false;
device_param->has_bfe = (sm >= 20) ? true : false;
device_param->has_lop3 = (sm >= 50) ? true : false;
device_param->has_mov64 = (sm >= 10) ? true : false;
device_param->has_prmt = (sm >= 20) ? true : false;
}
// common driver check
if (device_param->skipped == false)
@ -6432,6 +6414,130 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
opencl_devices_active++;
}
}
}
}
backend_ctx->opencl_devices_cnt = opencl_devices_cnt;
backend_ctx->opencl_devices_active = opencl_devices_active;
// all devices combined go into backend_* variables
backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt;
backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
// find duplicate devices
//if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
//{
// using force here enables both devices, which is the worst possible outcome
// many users force by default, so this is not a good idea
//if (user_options->force == false)
//{
backend_ctx_find_alias_devices (hashcat_ctx);
//{
//}
if (backend_ctx->backend_devices_active == 0)
{
event_log_error (hashcat_ctx, "No devices found/left.");
return -1;
}
// now we can calculate the number of parallel running hook threads based on
// the number cpu cores and the number of active compute devices
// unless overwritten by the user
if (user_options->hook_threads == HOOK_THREADS)
{
const u32 processor_count = hc_get_processor_count ();
const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
user_options->hook_threads = processor_count_cu;
}
// additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
if (backend_ctx->backend_devices_filter != (u64) -1)
{
const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
{
event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
return -1;
}
}
// time or resource intensive operations which we do not run if the corresponding device was skipped by the user
if (backend_ctx->cuda)
{
// instruction test for cuda devices was replaced with fixed values (see above)
/*
CUcontext cuda_context;
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
#define RUN_INSTRUCTION_CHECKS() \
device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \
device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
if (backend_devices_idx > 0)
{
hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
if (is_same_device_type (device_param, device_param_prev) == true)
{
device_param->has_add = device_param_prev->has_add;
device_param->has_addc = device_param_prev->has_addc;
device_param->has_sub = device_param_prev->has_sub;
device_param->has_subc = device_param_prev->has_subc;
device_param->has_bfe = device_param_prev->has_bfe;
device_param->has_lop3 = device_param_prev->has_lop3;
device_param->has_mov64 = device_param_prev->has_mov64;
device_param->has_prmt = device_param_prev->has_prmt;
}
else
{
RUN_INSTRUCTION_CHECKS();
}
}
else
{
RUN_INSTRUCTION_CHECKS();
}
#undef RUN_INSTRUCTION_CHECKS
if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1;
*/
}
if (backend_ctx->ocl)
{
for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
if (device_param->is_opencl == false) continue;
if (device_param->skipped == true) continue;
/**
* create context for each device
@ -6459,6 +6565,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) return -1;
// instruction set
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
{
#define RUN_INSTRUCTION_CHECKS()
@ -6507,16 +6615,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
{
const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
device_param->has_add = (sm >= 12) ? true : false;
device_param->has_addc = (sm >= 12) ? true : false;
device_param->has_sub = (sm >= 12) ? true : false;
device_param->has_subc = (sm >= 12) ? true : false;
device_param->has_bfe = (sm >= 20) ? true : false;
device_param->has_lop3 = (sm >= 50) ? true : false;
device_param->has_mov64 = (sm >= 10) ? true : false;
device_param->has_prmt = (sm >= 20) ? true : false;
// replaced with fixed values see non time intensive section above
/*
#define RUN_INSTRUCTION_CHECKS() \
@ -6558,7 +6657,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
*/
}
// device_available_mem
// available device memory
// This test causes an GPU memory usage spike.
// In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
// We will simply not run it if that device was skipped by the user.
#define MAX_ALLOC_CHECKS_CNT 8192
#define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
@ -6618,6 +6720,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
}
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
if (c > 0)
{
device_param->device_available_mem *= c;
@ -6643,63 +6746,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
hc_clReleaseContext (hashcat_ctx, context);
}
}
}
backend_ctx->opencl_devices_cnt = opencl_devices_cnt;
backend_ctx->opencl_devices_active = opencl_devices_active;
// all devices combined go into backend_* variables
backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt;
backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
// find duplicate devices
//if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
//{
// using force here enables both devices, which is the worst possible outcome
// many users force by default, so this is not a good idea
//if (user_options->force == false)
//{
backend_ctx_find_alias_devices (hashcat_ctx);
//{
//}
if (backend_ctx->backend_devices_active == 0)
{
event_log_error (hashcat_ctx, "No devices found/left.");
return -1;
}
// now we can calculate the number of parallel running hook threads based on
// the number cpu cores and the number of active compute devices
// unless overwritten by the user
if (user_options->hook_threads == HOOK_THREADS)
{
const u32 processor_count = hc_get_processor_count ();
const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
user_options->hook_threads = processor_count_cu;
}
// additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
if (backend_ctx->backend_devices_filter != (u64) -1)
{
const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
{
event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
return -1;
}
}
backend_ctx->target_msec = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];

Loading…
Cancel
Save