Kernels: Add standalone true UTF8 to UTF16 converter kernel that runs after amplifier. Use OPTS_TYPE_POST_AMP_UTF16LE from plugin

pull/2795/head
Jens Steube 3 years ago
parent 1afbcb0827
commit 5c6501444a

@ -212,3 +212,24 @@ KERNEL_FQ void gpu_atinit (GLOBAL_AS pw_t *buf, const u64 gid_max)
buf[gid] = pw;
}
KERNEL_FQ void gpu_utf8_to_utf16 (GLOBAL_AS pw_t *pws_buf, const u64 gid_max)
{
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
pw_t pw_in = pws_buf[gid];
pw_t pw_out;
for (int i = 0; i < 64; i++) pw_out.i[i] = 0;
hc_enc_t hc_enc;
hc_enc_init (&hc_enc);
pw_out.pw_len = hc_enc_next (&hc_enc, pw_in.i, pw_in.pw_len, 64, pw_out.i, 256);
pws_buf[gid] = pw_out;
}

@ -23,6 +23,7 @@
- Dependencies: Updated LZMA SDK from 19.00 to 21.02 alpha
- Dependencies: Updated xxHash from 0.1.0 to v0.8.0 - Stable XXH3
- Kernels: Add standalone true UTF8 to UTF16 converter kernel that runs after amplifier. Use OPTS_TYPE_POST_AMP_UTF16LE from plugin
* changes v6.2.0 -> v6.2.1

@ -122,10 +122,12 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 pws_cnt, const u8 chr);
int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);

@ -442,6 +442,7 @@ typedef enum opts_type
OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 51), // some algos use JiT in combinations with a salt or create too much startup time
OPTS_TYPE_MP_MULTI_DISABLE = (1ULL << 52), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
OPTS_TYPE_NATIVE_THREADS = (1ULL << 53), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
OPTS_TYPE_POST_AMP_UTF16LE = (1ULL << 54), // run the utf8 to utf16le conversion kernel after they have been processed from amplifiers
} opts_type_t;
@ -1116,6 +1117,7 @@ typedef struct hc_device_param
u32 kernel_wgs_tm;
u32 kernel_wgs_memset;
u32 kernel_wgs_atinit;
u32 kernel_wgs_utf8toutf16le;
u32 kernel_wgs_decompress;
u32 kernel_wgs_aux1;
u32 kernel_wgs_aux2;
@ -1140,6 +1142,7 @@ typedef struct hc_device_param
u32 kernel_preferred_wgs_multiple_tm;
u32 kernel_preferred_wgs_multiple_memset;
u32 kernel_preferred_wgs_multiple_atinit;
u32 kernel_preferred_wgs_multiple_utf8toutf16le;
u32 kernel_preferred_wgs_multiple_decompress;
u32 kernel_preferred_wgs_multiple_aux1;
u32 kernel_preferred_wgs_multiple_aux2;
@ -1164,6 +1167,7 @@ typedef struct hc_device_param
u64 kernel_local_mem_size_tm;
u64 kernel_local_mem_size_memset;
u64 kernel_local_mem_size_atinit;
u64 kernel_local_mem_size_utf8toutf16le;
u64 kernel_local_mem_size_decompress;
u64 kernel_local_mem_size_aux1;
u64 kernel_local_mem_size_aux2;
@ -1188,6 +1192,7 @@ typedef struct hc_device_param
u64 kernel_dynamic_local_mem_size_tm;
u64 kernel_dynamic_local_mem_size_memset;
u64 kernel_dynamic_local_mem_size_atinit;
u64 kernel_dynamic_local_mem_size_utf8toutf16le;
u64 kernel_dynamic_local_mem_size_decompress;
u64 kernel_dynamic_local_mem_size_aux1;
u64 kernel_dynamic_local_mem_size_aux2;
@ -1348,6 +1353,7 @@ typedef struct hc_device_param
void *kernel_params_tm[PARAMCNT];
void *kernel_params_memset[PARAMCNT];
void *kernel_params_atinit[PARAMCNT];
void *kernel_params_utf8toutf16le[PARAMCNT];
void *kernel_params_decompress[PARAMCNT];
u32 kernel_params_buf32[PARAMCNT];
@ -1371,6 +1377,9 @@ typedef struct hc_device_param
u32 kernel_params_atinit_buf32[PARAMCNT];
u64 kernel_params_atinit_buf64[PARAMCNT];
u32 kernel_params_utf8toutf16le_buf32[PARAMCNT];
u64 kernel_params_utf8toutf16le_buf64[PARAMCNT];
u32 kernel_params_decompress_buf32[PARAMCNT];
u64 kernel_params_decompress_buf64[PARAMCNT];
@ -1410,6 +1419,7 @@ typedef struct hc_device_param
CUfunction cuda_function_tm;
CUfunction cuda_function_memset;
CUfunction cuda_function_atinit;
CUfunction cuda_function_utf8toutf16le;
CUfunction cuda_function_decompress;
CUfunction cuda_function_aux1;
CUfunction cuda_function_aux2;
@ -1494,6 +1504,7 @@ typedef struct hc_device_param
cl_kernel opencl_kernel_tm;
cl_kernel opencl_kernel_memset;
cl_kernel opencl_kernel_atinit;
cl_kernel opencl_kernel_utf8toutf16le;
cl_kernel opencl_kernel_decompress;
cl_kernel opencl_kernel_aux1;
cl_kernel opencl_kernel_aux2;

@ -3037,6 +3037,19 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (run_kernel_amp (hashcat_ctx, device_param, pws_cnt) == -1) return -1;
}
if (hashconfig->opts_type & OPTS_TYPE_POST_AMP_UTF16LE)
{
if (device_param->is_cuda == true)
{
if (run_cuda_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, pws_cnt) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, pws_cnt) == -1) return -1;
}
}
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_1, pws_pos, pws_cnt, false, 0) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
@ -3414,6 +3427,26 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
return 0;
}
int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num)
{
u64 num_elements = num;
device_param->kernel_params_utf8toutf16le[0] = (void *) &buf;
device_param->kernel_params_utf8toutf16le_buf64[1] = num_elements;
const u64 kernel_threads = device_param->kernel_wgs_utf8toutf16le;
num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction function = device_param->cuda_function_utf8toutf16le;
if (hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_utf8toutf16le, NULL) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
return 0;
}
int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size)
{
const u64 num16d = size / 16;
@ -3495,6 +3528,34 @@ int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *dev
return 0;
}
int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
{
u64 num_elements = num;
device_param->kernel_params_utf8toutf16le_buf64[1] = num_elements;
const u64 kernel_threads = device_param->kernel_wgs_utf8toutf16le;
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
cl_kernel kernel = device_param->opencl_kernel_utf8toutf16le;
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
if (hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf) == -1) return -1;
if (hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_ulong), device_param->kernel_params_utf8toutf16le[1]) == -1) return -1;
if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL) == -1) return -1;
if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
if (hc_clFinish (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
return 0;
}
int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size)
{
const u64 num16d = size / 16;
@ -8681,6 +8742,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1;
device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
// GPU utf8 to utf16le conversion
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_utf8toutf16le, device_param->cuda_module_shared, "gpu_utf8_to_utf16") == -1) return -1;
if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
if (get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1;
device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size;
}
if (device_param->is_opencl == true)
@ -8720,6 +8793,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1;
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress) == -1) return -1;
// GPU utf8 to utf16le conversion
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_utf8_to_utf16", &device_param->opencl_kernel_utf8toutf16le) == -1) return -1;
if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1;
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_utf8toutf16le, &device_param->kernel_preferred_wgs_multiple_utf8toutf16le) == -1) return -1;
}
}
@ -9417,6 +9502,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_atinit[0] = NULL;
device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1];
device_param->kernel_params_utf8toutf16le_buf64[1] = 0; // gid_max
device_param->kernel_params_utf8toutf16le[0] = NULL;
device_param->kernel_params_utf8toutf16le[1] = &device_param->kernel_params_utf8toutf16le_buf64[1];
device_param->kernel_params_decompress_buf64[3] = 0; // gid_max
if (device_param->is_cuda == true)
@ -10058,6 +10148,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]) == -1) return -1;
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]) == -1) return -1;
// GPU utf8 to utf16le init
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_utf8toutf16le, 0, sizeof (cl_mem), device_param->kernel_params_utf8toutf16le[0]) == -1) return -1;
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_utf8toutf16le, 1, sizeof (cl_ulong), device_param->kernel_params_utf8toutf16le[1]) == -1) return -1;
// GPU decompress
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]) == -1) return -1;
@ -11242,6 +11337,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->cuda_function_amp = NULL;
device_param->cuda_function_memset = NULL;
device_param->cuda_function_atinit = NULL;
device_param->cuda_function_utf8toutf16le = NULL;
device_param->cuda_function_decompress = NULL;
device_param->cuda_function_aux1 = NULL;
device_param->cuda_function_aux2 = NULL;
@ -11313,6 +11409,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->opencl_kernel_amp) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_amp);
if (device_param->opencl_kernel_memset) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_memset);
if (device_param->opencl_kernel_atinit) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_atinit);
if (device_param->opencl_kernel_utf8toutf16le) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_utf8toutf16le);
if (device_param->opencl_kernel_decompress)hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_decompress);
if (device_param->opencl_kernel_aux1) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_aux1);
if (device_param->opencl_kernel_aux2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_aux2);
@ -11382,6 +11479,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->opencl_kernel_amp = NULL;
device_param->opencl_kernel_memset = NULL;
device_param->opencl_kernel_atinit = NULL;
device_param->opencl_kernel_utf8toutf16le = NULL;
device_param->opencl_kernel_decompress = NULL;
device_param->opencl_kernel_aux1 = NULL;
device_param->opencl_kernel_aux2 = NULL;

@ -439,6 +439,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
// missing handling hooks
if (hashconfig->opts_type & OPTS_TYPE_POST_AMP_UTF16LE)
{
if (device_param->is_cuda == true)
{
if (run_cuda_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 1) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 1) == -1) return -1;
}
}
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, 1, false, 0) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_HOOK12)

Loading…
Cancel
Save