mirror of
https://github.com/hashcat/hashcat.git
synced 2025-02-23 04:52:03 +00:00
Fixed race condition resulting in out of memory error on startup if multiple hashcat instances are started at the same time
This commit is contained in:
parent
6d5e1d3e5d
commit
e21463da4b
@ -12,6 +12,13 @@
|
||||
|
||||
- Fixed too early execution of some module functions which could make use of non-final values opts_type and opti_type
|
||||
- Fixed internal access on module option attribute OPTS_TYPE_SUGGEST_KG with the result that it was unused
|
||||
- Fixed race condition resulting in out of memory error on startup if multiple hashcat instances are started at the same time
|
||||
|
||||
##
|
||||
## Improvements
|
||||
##
|
||||
|
||||
- Startup time: Improved the startup time by avoiding some time intensive operations for skipped devices
|
||||
|
||||
* changes v6.1.0 -> v6.1.1
|
||||
|
||||
|
272
src/backend.c
272
src/backend.c
@ -5540,7 +5540,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
device_param->skipped = true;
|
||||
}
|
||||
|
||||
// some attributes have to be hardcoded because they are used for instance in the build options
|
||||
// some attributes have to be hardcoded values because they are used for instance in the build options
|
||||
|
||||
device_param->device_local_mem_type = CL_LOCAL;
|
||||
device_param->opencl_device_type = CL_DEVICE_TYPE_GPU;
|
||||
@ -5616,11 +5616,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
cuda_devices_active++;
|
||||
}
|
||||
|
||||
CUcontext cuda_context;
|
||||
|
||||
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
|
||||
|
||||
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
|
||||
// instruction set
|
||||
|
||||
// bcrypt optimization?
|
||||
//const int rc_cuCtxSetCacheConfig = hc_cuCtxSetCacheConfig (hashcat_ctx, CU_FUNC_CACHE_PREFER_SHARED);
|
||||
@ -5638,47 +5634,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
device_param->has_mov64 = (sm >= 10) ? true : false;
|
||||
device_param->has_prmt = (sm >= 20) ? true : false;
|
||||
|
||||
/*
|
||||
#define RUN_INSTRUCTION_CHECKS() \
|
||||
device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
|
||||
device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
|
||||
if (backend_devices_idx > 0)
|
||||
{
|
||||
hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
|
||||
|
||||
if (is_same_device_type (device_param, device_param_prev) == true)
|
||||
{
|
||||
device_param->has_add = device_param_prev->has_add;
|
||||
device_param->has_addc = device_param_prev->has_addc;
|
||||
device_param->has_sub = device_param_prev->has_sub;
|
||||
device_param->has_subc = device_param_prev->has_subc;
|
||||
device_param->has_bfe = device_param_prev->has_bfe;
|
||||
device_param->has_lop3 = device_param_prev->has_lop3;
|
||||
device_param->has_mov64 = device_param_prev->has_mov64;
|
||||
device_param->has_prmt = device_param_prev->has_prmt;
|
||||
}
|
||||
else
|
||||
{
|
||||
RUN_INSTRUCTION_CHECKS();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RUN_INSTRUCTION_CHECKS();
|
||||
}
|
||||
|
||||
#undef RUN_INSTRUCTION_CHECKS
|
||||
*/
|
||||
|
||||
// device_available_mem
|
||||
|
||||
CUcontext cuda_context;
|
||||
|
||||
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
|
||||
|
||||
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
|
||||
|
||||
size_t free = 0;
|
||||
size_t total = 0;
|
||||
|
||||
@ -6269,6 +6232,25 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
}
|
||||
}
|
||||
|
||||
// instruction set
|
||||
|
||||
// fixed values works only for nvidia devices
|
||||
// dynamical values for amd see time intensive section below
|
||||
|
||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
|
||||
{
|
||||
const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
|
||||
|
||||
device_param->has_add = (sm >= 12) ? true : false;
|
||||
device_param->has_addc = (sm >= 12) ? true : false;
|
||||
device_param->has_sub = (sm >= 12) ? true : false;
|
||||
device_param->has_subc = (sm >= 12) ? true : false;
|
||||
device_param->has_bfe = (sm >= 20) ? true : false;
|
||||
device_param->has_lop3 = (sm >= 50) ? true : false;
|
||||
device_param->has_mov64 = (sm >= 10) ? true : false;
|
||||
device_param->has_prmt = (sm >= 20) ? true : false;
|
||||
}
|
||||
|
||||
// common driver check
|
||||
|
||||
if (device_param->skipped == false)
|
||||
@ -6432,6 +6414,130 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
|
||||
opencl_devices_active++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
backend_ctx->opencl_devices_cnt = opencl_devices_cnt;
|
||||
backend_ctx->opencl_devices_active = opencl_devices_active;
|
||||
|
||||
// all devices combined go into backend_* variables
|
||||
|
||||
backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt;
|
||||
backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
|
||||
|
||||
// find duplicate devices
|
||||
|
||||
//if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
|
||||
//{
|
||||
// using force here enables both devices, which is the worst possible outcome
|
||||
// many users force by default, so this is not a good idea
|
||||
|
||||
//if (user_options->force == false)
|
||||
//{
|
||||
backend_ctx_find_alias_devices (hashcat_ctx);
|
||||
//{
|
||||
//}
|
||||
|
||||
if (backend_ctx->backend_devices_active == 0)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "No devices found/left.");
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// now we can calculate the number of parallel running hook threads based on
|
||||
// the number cpu cores and the number of active compute devices
|
||||
// unless overwritten by the user
|
||||
|
||||
if (user_options->hook_threads == HOOK_THREADS)
|
||||
{
|
||||
const u32 processor_count = hc_get_processor_count ();
|
||||
|
||||
const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
|
||||
|
||||
user_options->hook_threads = processor_count_cu;
|
||||
}
|
||||
|
||||
// additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
|
||||
|
||||
if (backend_ctx->backend_devices_filter != (u64) -1)
|
||||
{
|
||||
const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
|
||||
|
||||
if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
|
||||
event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// time or resource intensive operations which we do not run if the corresponding device was skipped by the user
|
||||
|
||||
if (backend_ctx->cuda)
|
||||
{
|
||||
// instruction test for cuda devices was replaced with fixed values (see above)
|
||||
|
||||
/*
|
||||
CUcontext cuda_context;
|
||||
|
||||
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
|
||||
|
||||
if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
|
||||
|
||||
#define RUN_INSTRUCTION_CHECKS() \
|
||||
device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
|
||||
device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \
|
||||
|
||||
if (backend_devices_idx > 0)
|
||||
{
|
||||
hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
|
||||
|
||||
if (is_same_device_type (device_param, device_param_prev) == true)
|
||||
{
|
||||
device_param->has_add = device_param_prev->has_add;
|
||||
device_param->has_addc = device_param_prev->has_addc;
|
||||
device_param->has_sub = device_param_prev->has_sub;
|
||||
device_param->has_subc = device_param_prev->has_subc;
|
||||
device_param->has_bfe = device_param_prev->has_bfe;
|
||||
device_param->has_lop3 = device_param_prev->has_lop3;
|
||||
device_param->has_mov64 = device_param_prev->has_mov64;
|
||||
device_param->has_prmt = device_param_prev->has_prmt;
|
||||
}
|
||||
else
|
||||
{
|
||||
RUN_INSTRUCTION_CHECKS();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RUN_INSTRUCTION_CHECKS();
|
||||
}
|
||||
|
||||
#undef RUN_INSTRUCTION_CHECKS
|
||||
|
||||
if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1;
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
if (backend_ctx->ocl)
|
||||
{
|
||||
for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
|
||||
{
|
||||
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
|
||||
|
||||
if (device_param->is_opencl == false) continue;
|
||||
|
||||
if (device_param->skipped == true) continue;
|
||||
|
||||
/**
|
||||
* create context for each device
|
||||
@ -6459,6 +6565,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
|
||||
if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) return -1;
|
||||
|
||||
// instruction set
|
||||
|
||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
|
||||
{
|
||||
#define RUN_INSTRUCTION_CHECKS()
|
||||
@ -6507,16 +6615,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
|
||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
|
||||
{
|
||||
const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
|
||||
|
||||
device_param->has_add = (sm >= 12) ? true : false;
|
||||
device_param->has_addc = (sm >= 12) ? true : false;
|
||||
device_param->has_sub = (sm >= 12) ? true : false;
|
||||
device_param->has_subc = (sm >= 12) ? true : false;
|
||||
device_param->has_bfe = (sm >= 20) ? true : false;
|
||||
device_param->has_lop3 = (sm >= 50) ? true : false;
|
||||
device_param->has_mov64 = (sm >= 10) ? true : false;
|
||||
device_param->has_prmt = (sm >= 20) ? true : false;
|
||||
// replaced with fixed values see non time intensive section above
|
||||
|
||||
/*
|
||||
#define RUN_INSTRUCTION_CHECKS() \
|
||||
@ -6558,7 +6657,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
*/
|
||||
}
|
||||
|
||||
// device_available_mem
|
||||
// available device memory
|
||||
// This test causes an GPU memory usage spike.
|
||||
// In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
|
||||
// We will simply not run it if that device was skipped by the user.
|
||||
|
||||
#define MAX_ALLOC_CHECKS_CNT 8192
|
||||
#define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
|
||||
@ -6618,6 +6720,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
}
|
||||
|
||||
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
|
||||
|
||||
if (c > 0)
|
||||
{
|
||||
device_param->device_available_mem *= c;
|
||||
@ -6643,63 +6746,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
hc_clReleaseContext (hashcat_ctx, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
backend_ctx->opencl_devices_cnt = opencl_devices_cnt;
|
||||
backend_ctx->opencl_devices_active = opencl_devices_active;
|
||||
|
||||
// all devices combined go into backend_* variables
|
||||
|
||||
backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt;
|
||||
backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
|
||||
|
||||
// find duplicate devices
|
||||
|
||||
//if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
|
||||
//{
|
||||
// using force here enables both devices, which is the worst possible outcome
|
||||
// many users force by default, so this is not a good idea
|
||||
|
||||
//if (user_options->force == false)
|
||||
//{
|
||||
backend_ctx_find_alias_devices (hashcat_ctx);
|
||||
//{
|
||||
//}
|
||||
|
||||
if (backend_ctx->backend_devices_active == 0)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "No devices found/left.");
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// now we can calculate the number of parallel running hook threads based on
|
||||
// the number cpu cores and the number of active compute devices
|
||||
// unless overwritten by the user
|
||||
|
||||
if (user_options->hook_threads == HOOK_THREADS)
|
||||
{
|
||||
const u32 processor_count = hc_get_processor_count ();
|
||||
|
||||
const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
|
||||
|
||||
user_options->hook_threads = processor_count_cu;
|
||||
}
|
||||
|
||||
// additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
|
||||
|
||||
if (backend_ctx->backend_devices_filter != (u64) -1)
|
||||
{
|
||||
const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
|
||||
|
||||
if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
|
||||
event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
backend_ctx->target_msec = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user