From 58d1dedd1e27aa3be5e0c6148b21099601253cf6 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sat, 19 Aug 2017 19:25:37 +0200 Subject: [PATCH 01/12] OpenCL Buffers: Do not allocate memory for amplifiers for fast hashes, it's simply not needed --- docs/changes.txt | 1 + include/types.h | 1 + src/opencl.c | 7 +++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index aafc642a2..edee965e8 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -38,6 +38,7 @@ ## - Charset: Add additional DES charsets with corrected parity +- OpenCL Buffers: Do not allocate memory for amplifiers for fast hashes, it's simply not needed - OpenCL Kernels: Improved performance of SHA-3 Kernel (keccak) by hardcoding the 0x80 stopbit - OpenCL Kernels: Move from ld.global.v4.u32 to ld.const.v4.u32 in _a3 kernels - OpenCL Kernels: Replace bitwise swaps with rotate() versions for AMD diff --git a/include/types.h b/include/types.h index 0191095ec..f66ca7f36 100644 --- a/include/types.h +++ b/include/types.h @@ -962,6 +962,7 @@ typedef struct hc_device_param u32 hardware_power; size_t size_pws; + size_t size_pws_amp; size_t size_tmps; size_t size_hooks; size_t size_bfs; diff --git a/src/opencl.c b/src/opencl.c index 052485133..ef7354570 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -4158,11 +4158,14 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) } */ + const size_t size_pws_amp = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) ? size_pws : size_pws; + device_param->size_bfs = size_bfs; device_param->size_combs = size_combs; device_param->size_rules = size_rules; device_param->size_rules_c = size_rules_c; device_param->size_pws = size_pws; + device_param->size_pws_amp = size_pws_amp; device_param->size_tmps = size_tmps; device_param->size_hooks = size_hooks; @@ -4785,7 +4788,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) */ CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws, NULL, &device_param->d_pws_buf); if (CL_rc == -1) return -1; - CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws, NULL, &device_param->d_pws_amp_buf); if (CL_rc == -1) return -1; + CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws_amp, NULL, &device_param->d_pws_amp_buf); if (CL_rc == -1) return -1; CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_tmps, NULL, &device_param->d_tmps); if (CL_rc == -1) return -1; CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_hooks, NULL, &device_param->d_hooks); if (CL_rc == -1) return -1; CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_a); if (CL_rc == -1) return -1; @@ -5374,7 +5377,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) // zero some data buffers CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1; - CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf, device_param->size_pws); if (CL_rc == -1) return -1; + CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf, device_param->size_pws_amp); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1; From 14324467340f3aa20d4789617967cae2015daf9f Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 20 Aug 2017 12:04:56 +0200 Subject: [PATCH 02/12] Fix initialization of size_pws_amp --- src/autotune.c | 2 +- src/opencl.c | 17 ++++++++++------- src/selftest.c | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/autotune.c b/src/autotune.c index 652319f59..892f80dba 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -272,7 +272,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL) { - CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_pws_amp_buf, 0, device_param->size_pws); + CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_pws_amp_buf, 0, device_param->size_pws_amp); if (CL_rc == -1) return -1; } diff --git a/src/opencl.c b/src/opencl.c index ef7354570..b000d8c4e 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -3955,6 +3955,8 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) const size_t size_pws = kernel_power_max * sizeof (pw_t); + const size_t size_pws_amp = size_pws; + // size_tmps const size_t size_tmps = kernel_power_max * hashconfig->tmp_size; @@ -3980,7 +3982,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) + size_markov_css + size_plains + size_pws - + size_pws // not a bug + + size_pws_amp + size_results + size_root_css + size_rules @@ -4064,9 +4066,10 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) // find out if we would request too much memory on memory blocks which are based on kernel_accel - size_t size_pws = 4; - size_t size_tmps = 4; - size_t size_hooks = 4; + size_t size_pws = 4; + size_t size_pws_amp = 4; + size_t size_tmps = 4; + size_t size_hooks = 4; while (kernel_accel_max >= kernel_accel_min) { @@ -4076,6 +4079,8 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) size_pws = kernel_power_max * sizeof (pw_t); + size_pws_amp = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) ? 1 : size_pws; + // size_tmps size_tmps = kernel_power_max * hashconfig->tmp_size; @@ -4110,7 +4115,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) + size_markov_css + size_plains + size_pws - + size_pws // not a bug + + size_pws_amp + size_results + size_root_css + size_rules @@ -4158,8 +4163,6 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) } */ - const size_t size_pws_amp = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) ? size_pws : size_pws; - device_param->size_bfs = size_bfs; device_param->size_combs = size_combs; device_param->size_rules = size_rules; diff --git a/src/selftest.c b/src/selftest.c index 4609d2c31..1314069ec 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -455,7 +455,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param device_param->kernel_params[18] = &device_param->d_esalt_bufs; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1; - CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf, device_param->size_pws); if (CL_rc == -1) return -1; + CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf, device_param->size_pws_amp); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1; From 508f1562f2eec79476bebf80a860837fcb8b5952 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 20 Aug 2017 12:13:34 +0200 Subject: [PATCH 03/12] Fix --stdout kernels, gid_max was still set to u32 --- OpenCL/m02000_a0.cl | 9 ++++++--- OpenCL/m02000_a1.cl | 9 ++++++--- OpenCL/m02000_a3.cl | 9 ++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/OpenCL/m02000_a0.cl b/OpenCL/m02000_a0.cl index d6921cbe4..793e13ac3 100644 --- a/OpenCL/m02000_a0.cl +++ b/OpenCL/m02000_a0.cl @@ -3,7 +3,10 @@ * License.....: MIT */ -__kernel void gpu_memset (__global uint4 *buf, const uint value, const uint gid_max) +typedef uint u32; +typedef ulong u64; + +__kernel void gpu_memset (__global uint4 *buf, const u32 value, const u64 gid_max) { const u64 gid = get_global_id (0); @@ -12,10 +15,10 @@ __kernel void gpu_memset (__global uint4 *buf, const uint value, const uint gid_ buf[gid] = (uint4) (value); } -__kernel void m02000_mxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const uint bitmap_mask, const uint bitmap_shift1, const uint bitmap_shift2, const uint salt_pos, const uint loop_pos, const uint loop_cnt, const uint il_cnt, const uint digests_cnt, const uint digests_offset, const uint combs_mode, const uint gid_max) +__kernel void m02000_mxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { } -__kernel void m02000_sxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const uint bitmap_mask, const uint bitmap_shift1, const uint bitmap_shift2, const uint salt_pos, const uint loop_pos, const uint loop_cnt, const uint il_cnt, const uint digests_cnt, const uint digests_offset, const uint combs_mode, const uint gid_max) +__kernel void m02000_sxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { } diff --git a/OpenCL/m02000_a1.cl b/OpenCL/m02000_a1.cl index d6921cbe4..793e13ac3 100644 --- a/OpenCL/m02000_a1.cl +++ b/OpenCL/m02000_a1.cl @@ -3,7 +3,10 @@ * License.....: MIT */ -__kernel void gpu_memset (__global uint4 *buf, const uint value, const uint gid_max) +typedef uint u32; +typedef ulong u64; + +__kernel void gpu_memset (__global uint4 *buf, const u32 value, const u64 gid_max) { const u64 gid = get_global_id (0); @@ -12,10 +15,10 @@ __kernel void gpu_memset (__global uint4 *buf, const uint value, const uint gid_ buf[gid] = (uint4) (value); } -__kernel void m02000_mxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const uint bitmap_mask, const uint bitmap_shift1, const uint bitmap_shift2, const uint salt_pos, const uint loop_pos, const uint loop_cnt, const uint il_cnt, const uint digests_cnt, const uint digests_offset, const uint combs_mode, const uint gid_max) +__kernel void m02000_mxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { } -__kernel void m02000_sxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const uint bitmap_mask, const uint bitmap_shift1, const uint bitmap_shift2, const uint salt_pos, const uint loop_pos, const uint loop_cnt, const uint il_cnt, const uint digests_cnt, const uint digests_offset, const uint combs_mode, const uint gid_max) +__kernel void m02000_sxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { } diff --git a/OpenCL/m02000_a3.cl b/OpenCL/m02000_a3.cl index d6921cbe4..793e13ac3 100644 --- a/OpenCL/m02000_a3.cl +++ b/OpenCL/m02000_a3.cl @@ -3,7 +3,10 @@ * License.....: MIT */ -__kernel void gpu_memset (__global uint4 *buf, const uint value, const uint gid_max) +typedef uint u32; +typedef ulong u64; + +__kernel void gpu_memset (__global uint4 *buf, const u32 value, const u64 gid_max) { const u64 gid = get_global_id (0); @@ -12,10 +15,10 @@ __kernel void gpu_memset (__global uint4 *buf, const uint value, const uint gid_ buf[gid] = (uint4) (value); } -__kernel void m02000_mxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const uint bitmap_mask, const uint bitmap_shift1, const uint bitmap_shift2, const uint salt_pos, const uint loop_pos, const uint loop_cnt, const uint il_cnt, const uint digests_cnt, const uint digests_offset, const uint combs_mode, const uint gid_max) +__kernel void m02000_mxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { } -__kernel void m02000_sxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const uint bitmap_mask, const uint bitmap_shift1, const uint bitmap_shift2, const uint salt_pos, const uint loop_pos, const uint loop_cnt, const uint il_cnt, const uint digests_cnt, const uint digests_offset, const uint combs_mode, const uint gid_max) +__kernel void m02000_sxx (__global void *pws, __global void *rules_buf, __global void *combs_buf, __global void * words_buf_r, __global void *tmps, __global void *hooks, __global void *bitmaps_buf_s1_a, __global void *bitmaps_buf_s1_b, __global void *bitmaps_buf_s1_c, __global void *bitmaps_buf_s1_d, __global void *bitmaps_buf_s2_a, __global void *bitmaps_buf_s2_b, __global void *bitmaps_buf_s2_c, __global void *bitmaps_buf_s2_d, __global void *plains_buf, __global void *digests_buf, __global void *hashes_shown, __global void *salt_bufs, __global const void *esalt_bufs, __global void *d_return_buf, __global void *d_scryptV0_buf, __global void *d_scryptV1_buf, __global void *d_scryptV2_buf, __global void *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { } From f63df45fca5f42072587ff1db80c6f4f4dd26dc6 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 20 Aug 2017 12:27:19 +0200 Subject: [PATCH 04/12] Backport current state of pure kernel rule engine to CPU --- include/rp_kernel_on_cpu.h | 2 +- src/rp_kernel_on_cpu.c | 434 ++++++++++++++++++++++++++----------- 2 files changed, 310 insertions(+), 126 deletions(-) diff --git a/include/rp_kernel_on_cpu.h b/include/rp_kernel_on_cpu.h index d949c5cf3..d4e2b1133 100644 --- a/include/rp_kernel_on_cpu.h +++ b/include/rp_kernel_on_cpu.h @@ -6,6 +6,6 @@ #ifndef _RP_KERNEL_ON_CPU_H #define _RP_KERNEL_ON_CPU_H -int apply_rules (const u32 *cmds, u32 buf[64], const int in_len); +int apply_rules (const u32 *cmds, u32 *buf, const int in_len); #endif // _RP_KERNEL_ON_CPU_H diff --git a/src/rp_kernel_on_cpu.c b/src/rp_kernel_on_cpu.c index dbc42d2f5..2d6f30037 100644 --- a/src/rp_kernel_on_cpu.c +++ b/src/rp_kernel_on_cpu.c @@ -9,104 +9,286 @@ #include "rp.h" #include "rp_kernel_on_cpu.h" -static void upper_at (u8 *buf, const int pos) +static u64 hl32_to_64 (const u32 a, const u32 b) { - const u8 c = buf[pos]; - - if ((c >= 'a') && (c <= 'z')) buf[pos] ^= 0x20; + return (((u64) a) << 32) | b; } -static void lower_at (u8 *buf, const int pos) +static u32 l32_from_64_S (u64 a) { - const u8 c = buf[pos]; + const u32 r = (u32) (a); - if ((c >= 'A') && (c <= 'Z')) buf[pos] ^= 0x20; + return r; } -static void toggle_at (u8 *buf, const int pos) +static u32 h32_from_64_S (u64 a) { - const u8 c = buf[pos]; + a >>= 32; - if ((c >= 'a') && (c <= 'z')) buf[pos] ^= 0x20; - if ((c >= 'A') && (c <= 'Z')) buf[pos] ^= 0x20; + const u32 r = (u32) (a); + + return r; } -static void mangle_switch (u8 *buf, const int l, const int r) +static u32 generate_cmask (const u32 value) { - const u8 c = buf[r]; - buf[r] = buf[l]; - buf[l] = c; + const u32 rmask = ((value & 0x40404040u) >> 1u) + & ~((value & 0x80808080u) >> 2u); + + const u32 hmask = (value & 0x1f1f1f1fu) + 0x05050505u; + const u32 lmask = (value & 0x1f1f1f1fu) + 0x1f1f1f1fu; + + return rmask & ~hmask & lmask; } -static int mangle_lrest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static void append_four_byte (const u32 *buf_src, const int off_src, u32 *buf_dst, const int off_dst) { - for (int pos = 0; pos < len; pos++) lower_at (buf, pos); + const int sd = off_src / 4; + const int sm = off_src & 3; + const int sm8 = sm * 8; - return (len); + const int dd = off_dst / 4; + const int dm = off_dst & 3; + const int dm8 = dm * 8; + + u64 t64 = hl32_to_64 (buf_src[sd + 1], buf_src[sd + 0]); + + t64 >>= sm8; + t64 <<= dm8; + + const u32 t0 = l32_from_64_S (t64); + const u32 t1 = h32_from_64_S (t64); + + buf_dst[dd + 0] |= t0; + buf_dst[dd + 1] |= t1; } -static int mangle_lrest_ufirst (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static void append_three_byte (const u32 *buf_src, const int off_src, u32 *buf_dst, const int off_dst) { - for (int pos = 0; pos < len; pos++) lower_at (buf, pos); + const int sd = off_src / 4; + const int sm = off_src & 3; + const int sm8 = sm * 8; - upper_at (buf, 0); + const int dd = off_dst / 4; + const int dm = off_dst & 3; + const int dm8 = dm * 8; - return (len); + u64 t64 = hl32_to_64 (buf_src[sd + 1], buf_src[sd + 0]); + + t64 >>= sm8; + t64 &= 0x00ffffff; + t64 <<= dm8; + + const u32 t0 = l32_from_64_S (t64); + const u32 t1 = h32_from_64_S (t64); + + buf_dst[dd + 0] |= t0; + buf_dst[dd + 1] |= t1; } -static int mangle_urest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static void append_two_byte (const u32 *buf_src, const int off_src, u32 *buf_dst, const int off_dst) { - for (int pos = 0; pos < len; pos++) upper_at (buf, pos); + const int sd = off_src / 4; + const int sm = off_src & 3; + const int sm8 = sm * 8; - return (len); + const int dd = off_dst / 4; + const int dm = off_dst & 3; + const int dm8 = dm * 8; + + u64 t64 = hl32_to_64 (buf_src[sd + 1], buf_src[sd + 0]); + + t64 >>= sm8; + t64 &= 0x0000ffff; + t64 <<= dm8; + + const u32 t0 = l32_from_64_S (t64); + const u32 t1 = h32_from_64_S (t64); + + buf_dst[dd + 0] |= t0; + buf_dst[dd + 1] |= t1; } -static int mangle_urest_lfirst (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static void append_one_byte (const u32 *buf_src, const int off_src, u32 *buf_dst, const int off_dst) { - for (int pos = 0; pos < len; pos++) upper_at (buf, pos); + const int sd = off_src / 4; + const int sm = off_src & 3; + const int sm8 = sm * 8; - lower_at (buf, 0); + const int dd = off_dst / 4; + const int dm = off_dst & 3; + const int dm8 = dm * 8; - return (len); + u32 t = buf_src[sd]; + + t >>= sm8; + t &= 0xff; + t <<= dm8; + + buf_dst[dd] |= t; } -static int mangle_trest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static void append_block (const u32 *buf_src, const int off_src, u32 *buf_dst, const int off_dst, const int len) { - for (int pos = 0; pos < len; pos++) toggle_at (buf, pos); + int i; - return (len); -} - -static int mangle_toggle_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) -{ - if (p0 >= len) return (len); - - toggle_at (buf, p0); - - return (len); -} - -static int mangle_reverse (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) -{ - for (int l = 0; l < len / 2; l++) + for (i = 0; i < len - 4; i += 4) { - const int r = len - 1 - l; + append_four_byte (buf_src, off_src + i, buf_dst, off_dst + i); + } - mangle_switch (buf, l, r); + const int left = len - i; + + switch (left) + { + case 3: append_three_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; + case 2: append_two_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; + case 1: append_one_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; + } +} + +static void exchange_byte (u32 *buf, const int off_src, const int off_dst) +{ + u8 *ptr = (u8 *) buf; + + const u8 tmp = ptr[off_src]; + + ptr[off_src] = ptr[off_dst]; + ptr[off_dst] = tmp; + + /* + something tells me we do this faster + + const int sd = off_src / 4; + const int sm = off_src & 3; + const int sm8 = sm * 8; + + const int dd = off_dst / 4; + const int dm = off_dst & 3; + const int dm8 = dm * 8; + + u32 ts = buf[sd]; + u32 td = buf[dd]; + + ts >>= sm8; + td >>= dm8; + + ts &= 0xff; + td &= 0xff; + + const u32 x = ts ^ td; + + const u32 xs = x << sm8; + const u32 xd = x << dm8; + + buf[sd] ^= xs; + buf[dd] ^= xd; + */ +} + +static int mangle_lrest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + for (int i = 0, idx = 0; i < len; i += 4, idx += 1) + { + const u32 t = buf[idx]; + + buf[idx] = t | generate_cmask (t); } return (len); } -static int mangle_dupeword (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_lrest_ufirst (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + for (int i = 0, idx = 0; i < len; i += 4, idx += 1) + { + const u32 t = buf[idx]; + + buf[idx] = t | generate_cmask (t); + } + + const u32 t = buf[0]; + + buf[0] = t & ~(0x00000020 & generate_cmask (t)); + + return (len); +} + +static int mangle_urest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + for (int i = 0, idx = 0; i < len; i += 4, idx += 1) + { + const u32 t = buf[idx]; + + buf[idx] = t & ~(generate_cmask (t)); + } + + return (len); +} + +static int mangle_urest_lfirst (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + for (int i = 0, idx = 0; i < len; i += 4, idx += 1) + { + const u32 t = buf[idx]; + + buf[idx] = t & ~(generate_cmask (t)); + } + + const u32 t = buf[0]; + + buf[0] = t | (0x00000020 & generate_cmask (t)); + + return (len); +} + +static int mangle_trest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + for (int i = 0, idx = 0; i < len; i += 4, idx += 1) + { + const u32 t = buf[idx]; + + buf[idx] = t ^ generate_cmask (t); + } + + return (len); +} + +static int mangle_toggle_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + if (p0 >= len) return (len); + + const u8 p0d = p0 / 4; + const u8 p0m = p0 & 3; + + const u32 tmp = 0x20u << (p0m * 8); + + const u32 t = buf[p0d]; + + buf[p0d] = t ^ (generate_cmask (t) & tmp); + + return (len); +} + +static int mangle_reverse (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) +{ + for (int l = 0; l < len / 2; l++) + { + const int r = len - 1 - l; + + exchange_byte (buf, l, r); + } + + return (len); +} + +static int mangle_dupeword (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { const int out_len = len * 2; if (out_len >= RP_PASSWORD_SIZE) return (len); - u8 *out = buf + len; - - for (int i = 0; i < len; i++) *out++ = *buf++; + append_block (buf, 0, buf, len, len); return (out_len); } @@ -124,15 +306,20 @@ static int mangle_dupeword_times (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u return (out_len); } -static int mangle_reflect (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_reflect (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { const int out_len = len * 2; if (out_len >= RP_PASSWORD_SIZE) return (len); - mangle_dupeword (p0, p1, buf, len); + append_block (buf, 0, buf, len, len); - mangle_reverse (p0, p1, buf + len, len); + for (int l = 0; l < len / 2; l++) + { + const int r = len - 1 - l; + + exchange_byte (buf, len + l, len + r); + } return out_len; } @@ -164,21 +351,21 @@ static int mangle_prepend (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u return (out_len); } -static int mangle_rotate_left (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_rotate_left (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { for (int l = 0, r = len - 1; r > l; r--) { - mangle_switch (buf, l, r); + exchange_byte (buf, l, r); } return (len); } -static int mangle_rotate_right (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_rotate_right (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { for (int l = 0, r = len - 1; l < r; l++) { - mangle_switch (buf, l, r); + exchange_byte (buf, l, r); } return (len); @@ -370,30 +557,30 @@ static int mangle_dupechar_all (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 return (out_len); } -static int mangle_switch_first (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_switch_first (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { if (len < 2) return (len); - mangle_switch (buf, 0, 1); + exchange_byte (buf, 0, 1); return (len); } -static int mangle_switch_last (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_switch_last (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { if (len < 2) return (len); - mangle_switch (buf, len - 2, len - 1); + exchange_byte (buf, len - 2, len - 1); return (len); } -static int mangle_switch_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_switch_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { if (p0 >= len) return (len); if (p1 >= len) return (len); - mangle_switch (buf, p0, p1); + exchange_byte (buf, p0, p1); return (len); } @@ -490,86 +677,83 @@ static int mangle_dupeblock_last (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u return (out_len); } -static int mangle_title_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len) +static int mangle_title_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { - int upper_next = 1; + if ((len + 4) >= RP_PASSWORD_SIZE) return (len); // cheap way to not need to check for overflow of i + 1 - for (int pos = 0; pos < len; pos++) + mangle_lrest_ufirst (0, 0, buf, len); + + for (int i = 0, idx = 0; i < len; i += 4, idx += 1) { - if (buf[pos] == p0) - { - upper_next = 1; + const u32 v = buf[idx]; - continue; - } + u32 out0 = 0; + u32 out1 = 0; - if (upper_next) - { - upper_next = 0; + if (((v >> 0) & 0xff) == p0) out0 |= 0x0000ff00; + if (((v >> 8) & 0xff) == p0) out0 |= 0x00ff0000; + if (((v >> 16) & 0xff) == p0) out0 |= 0xff000000; + if (((v >> 24) & 0xff) == p0) out1 |= 0x000000ff; - upper_at (buf, pos); - } - else - { - lower_at (buf, pos); - } + buf[idx + 0] &= ~(generate_cmask (buf[idx + 0]) & out0); + buf[idx + 1] &= ~(generate_cmask (buf[idx + 1]) & out1); } return (len); } -static int apply_rule (const u32 name, MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int in_len) +static int apply_rule (const u32 name, MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int in_len) { int out_len = in_len; switch (name) { - case RULE_OP_MANGLE_LREST: out_len = mangle_lrest (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_LREST_UFIRST: out_len = mangle_lrest_ufirst (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_UREST: out_len = mangle_urest (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_UREST_LFIRST: out_len = mangle_urest_lfirst (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_TREST: out_len = mangle_trest (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_TOGGLE_AT: out_len = mangle_toggle_at (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_REVERSE: out_len = mangle_reverse (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPEWORD: out_len = mangle_dupeword (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = mangle_dupeword_times (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_REFLECT: out_len = mangle_reflect (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_APPEND: out_len = mangle_append (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_PREPEND: out_len = mangle_prepend (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_ROTATE_LEFT: out_len = mangle_rotate_left (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = mangle_rotate_right (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DELETE_FIRST: out_len = mangle_delete_first (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DELETE_LAST: out_len = mangle_delete_last (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DELETE_AT: out_len = mangle_delete_at (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_EXTRACT: out_len = mangle_extract (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_OMIT: out_len = mangle_omit (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_INSERT: out_len = mangle_insert (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_OVERSTRIKE: out_len = mangle_overstrike (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_TRUNCATE_AT: out_len = mangle_truncate_at (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_REPLACE: out_len = mangle_replace (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_PURGECHAR: out_len = mangle_purgechar (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = mangle_dupechar_first (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = mangle_dupechar_last (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = mangle_dupechar_all (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_SWITCH_FIRST: out_len = mangle_switch_first (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_SWITCH_LAST: out_len = mangle_switch_last (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_SWITCH_AT: out_len = mangle_switch_at (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_CHR_SHIFTL: out_len = mangle_chr_shiftl (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_CHR_SHIFTR: out_len = mangle_chr_shiftr (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_CHR_INCR: out_len = mangle_chr_incr (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_CHR_DECR: out_len = mangle_chr_decr (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_REPLACE_NP1: out_len = mangle_replace_np1 (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_REPLACE_NM1: out_len = mangle_replace_nm1 (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = mangle_dupeblock_first (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = mangle_dupeblock_last (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_TITLE_SEP: out_len = mangle_title_sep (p0, p1, buf, out_len); break; - case RULE_OP_MANGLE_TITLE: out_len = mangle_title_sep (' ', p1, buf, out_len); break; + case RULE_OP_MANGLE_LREST: out_len = mangle_lrest (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_LREST_UFIRST: out_len = mangle_lrest_ufirst (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_UREST: out_len = mangle_urest (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_UREST_LFIRST: out_len = mangle_urest_lfirst (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_TREST: out_len = mangle_trest (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_TOGGLE_AT: out_len = mangle_toggle_at (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_REVERSE: out_len = mangle_reverse (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_DUPEWORD: out_len = mangle_dupeword (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = mangle_dupeword_times (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_REFLECT: out_len = mangle_reflect (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_APPEND: out_len = mangle_append (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_PREPEND: out_len = mangle_prepend (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_ROTATE_LEFT: out_len = mangle_rotate_left (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = mangle_rotate_right (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_DELETE_FIRST: out_len = mangle_delete_first (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DELETE_LAST: out_len = mangle_delete_last (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DELETE_AT: out_len = mangle_delete_at (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_EXTRACT: out_len = mangle_extract (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_OMIT: out_len = mangle_omit (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_INSERT: out_len = mangle_insert (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_OVERSTRIKE: out_len = mangle_overstrike (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_TRUNCATE_AT: out_len = mangle_truncate_at (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_REPLACE: out_len = mangle_replace (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_PURGECHAR: out_len = mangle_purgechar (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = mangle_dupechar_first (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = mangle_dupechar_last (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = mangle_dupechar_all (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_SWITCH_FIRST: out_len = mangle_switch_first (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_SWITCH_LAST: out_len = mangle_switch_last (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_SWITCH_AT: out_len = mangle_switch_at (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_CHR_SHIFTL: out_len = mangle_chr_shiftl (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_CHR_SHIFTR: out_len = mangle_chr_shiftr (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_CHR_INCR: out_len = mangle_chr_incr (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_CHR_DECR: out_len = mangle_chr_decr (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_REPLACE_NP1: out_len = mangle_replace_np1 (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_REPLACE_NM1: out_len = mangle_replace_nm1 (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = mangle_dupeblock_first (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = mangle_dupeblock_last (p0, p1, (u8 *) buf, out_len); break; + case RULE_OP_MANGLE_TITLE_SEP: out_len = mangle_title_sep (p0, p1, buf, out_len); break; + case RULE_OP_MANGLE_TITLE: out_len = mangle_title_sep (' ', p1, buf, out_len); break; } return out_len; } -int apply_rules (const u32 *cmds, u32 buf[64], const int in_len) +int apply_rules (const u32 *cmds, u32 *buf, const int in_len) { int out_len = in_len; @@ -581,7 +765,7 @@ int apply_rules (const u32 *cmds, u32 buf[64], const int in_len) const u8 p0 = (cmd >> 8) & 0xff; const u8 p1 = (cmd >> 16) & 0xff; - out_len = apply_rule (name, p0, p1, (u8 *) buf, out_len); + out_len = apply_rule (name, p0, p1, buf, out_len); } return out_len; From 6907981f089dd64257fd9045cb3dd6ae1e5dd108 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 20 Aug 2017 12:50:24 +0200 Subject: [PATCH 05/12] Backport current state of optimized kernel rule engine to CPU --- OpenCL/inc_rp_optimized.cl | 94 ++- include/rp_kernel_on_cpu_optimized.h | 3 +- src/rp_kernel_on_cpu_optimized.c | 1009 +++++++++++++------------- 3 files changed, 537 insertions(+), 569 deletions(-) diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index 38158c534..f081a2937 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -3,6 +3,8 @@ * License.....: MIT */ +#define MAYBE_UNUSED + static u32 generate_cmask (const u32 value) { const u32 rmask = ((value & 0x40404040u) >> 1u) @@ -756,10 +758,6 @@ static void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4]) { - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - u32 s0 = 0; u32 s1 = 0; u32 s2 = 0; @@ -882,6 +880,10 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 #endif #ifdef IS_NV + const int offset_mod_4 = offset & 3; + + const int offset_minus_4 = 4 - offset_mod_4; + const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const u32 src_r00 = src_r0[0]; @@ -1022,7 +1024,7 @@ static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], con out1[3] = swap32_S (tib41[3]); } -static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_lrest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { buf0[0] |= (generate_cmask (buf0[0])); buf0[1] |= (generate_cmask (buf0[1])); @@ -1036,7 +1038,7 @@ static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 bu return in_len; } -static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_urest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { buf0[0] &= ~(generate_cmask (buf0[0])); buf0[1] &= ~(generate_cmask (buf0[1])); @@ -1050,7 +1052,7 @@ static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 bu return in_len; } -static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_lrest_ufirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len); @@ -1059,7 +1061,7 @@ static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_urest_lfirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { rule_op_mangle_urest (p0, p1, buf0, buf1, in_len); @@ -1068,7 +1070,7 @@ static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_trest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { buf0[0] ^= (generate_cmask (buf0[0])); buf0[1] ^= (generate_cmask (buf0[1])); @@ -1082,7 +1084,7 @@ static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 bu return in_len; } -static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_toggle_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -1103,14 +1105,14 @@ static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u3 return in_len; } -static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_reverse (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { reverse_block (buf0, buf1, buf0, buf1, in_len); return in_len; } -static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeword (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ((in_len + in_len) >= 32) return (in_len); @@ -1123,7 +1125,7 @@ static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 return out_len; } -static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeword_times (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (((in_len * p0) + in_len) >= 32) return (in_len); @@ -1151,7 +1153,7 @@ static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4 return out_len; } -static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_reflect (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ((in_len + in_len) >= 32) return (in_len); @@ -1169,7 +1171,7 @@ static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 return out_len; } -static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_append (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ((in_len + 1) >= 32) return (in_len); @@ -1182,7 +1184,7 @@ static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 b return out_len; } -static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_prepend (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ((in_len + 1) >= 32) return (in_len); @@ -1197,7 +1199,7 @@ static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 return out_len; } -static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_rotate_left (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -1221,7 +1223,7 @@ static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_rotate_right (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -1251,7 +1253,7 @@ static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_delete_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -1262,7 +1264,7 @@ static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], return in_len1; } -static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_delete_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -1282,7 +1284,7 @@ static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], return in_len1; } -static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -1357,7 +1359,7 @@ static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u3 return out_len; } -static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_extract (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -1372,7 +1374,7 @@ static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 return out_len; } -static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -1458,7 +1460,7 @@ static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf return out_len; } -static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 > in_len) return (in_len); @@ -1530,7 +1532,7 @@ static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 b return out_len; } -static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_overstrike (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -1553,7 +1555,7 @@ static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u return in_len; } -static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_truncate_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -1562,7 +1564,7 @@ static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], return p0; } -static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_replace (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { const uchar4 tmp0 = (uchar4) (p0); const uchar4 tmp1 = (uchar4) (p1); @@ -1581,7 +1583,7 @@ static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 return in_len; } -static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_purgechar (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { u32 out_len = 0; @@ -1622,13 +1624,7 @@ static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u3 return out_len; } -static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) -{ - // TODO - return in_len; -} - -static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupechar_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); if ((in_len + p0) >= 32) return (in_len); @@ -1815,7 +1811,7 @@ static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4 return out_len; } -static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupechar_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); if ((in_len + p0) >= 32) return (in_len); @@ -1849,7 +1845,7 @@ static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4] return out_len; } -static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupechar_all (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); if ((in_len + in_len) >= 32) return (in_len); @@ -1882,7 +1878,7 @@ static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], return out_len; } -static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_switch_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len < 2) return (in_len); @@ -1891,7 +1887,7 @@ static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_switch_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len < 2) return (in_len); @@ -1976,7 +1972,7 @@ static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_switch_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); if (p1 >= in_len) return (in_len); @@ -2223,7 +2219,7 @@ static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u3 return in_len; } -static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_shiftl (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2245,7 +2241,7 @@ static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u return in_len; } -static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_shiftr (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2267,7 +2263,7 @@ static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u return in_len; } -static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_incr (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2291,7 +2287,7 @@ static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 return in_len; } -static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_decr (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2315,7 +2311,7 @@ static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 return in_len; } -static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_replace_np1 (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ((p0 + 1) >= in_len) return (in_len); @@ -2342,7 +2338,7 @@ static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_replace_nm1 (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 == 0) return (in_len); @@ -2371,7 +2367,7 @@ static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], return in_len; } -static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeblock_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 > in_len) return (in_len); @@ -2409,7 +2405,7 @@ static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[ return out_len; } -static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeblock_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (p0 > in_len) return (in_len); @@ -2438,7 +2434,7 @@ static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4 return out_len; } -static u32 rule_op_mangle_title_sep (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_title_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { buf0[0] |= (generate_cmask (buf0[0])); buf0[1] |= (generate_cmask (buf0[1])); diff --git a/include/rp_kernel_on_cpu_optimized.h b/include/rp_kernel_on_cpu_optimized.h index e3b503c0a..f8b81bed4 100644 --- a/include/rp_kernel_on_cpu_optimized.h +++ b/include/rp_kernel_on_cpu_optimized.h @@ -7,6 +7,7 @@ #define _RP_KERNEL_ON_CPU_OPTIMIZED_H u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len); -u32 apply_rules_optimized (u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len); + +u32 apply_rules_optimized (const u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len); #endif // _RP_KERNEL_ON_CPU_OPTIMIZED_H diff --git a/src/rp_kernel_on_cpu_optimized.c b/src/rp_kernel_on_cpu_optimized.c index 46b43bf0b..3567dca1f 100644 --- a/src/rp_kernel_on_cpu_optimized.c +++ b/src/rp_kernel_on_cpu_optimized.c @@ -9,7 +9,7 @@ #include "rp.h" #include "rp_kernel_on_cpu_optimized.h" -static u32 amd_bytealign (const u32 a, const u32 b, const u32 c) +static u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c) { const u64 tmp = ((((u64) (a)) << 32) | ((u64) (b))) >> ((c & 3) * 8); @@ -140,321 +140,26 @@ static void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset) static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { - out0[0] = in0[0] >> 8 | in0[1] << 24; - out0[1] = in0[1] >> 8 | in0[2] << 24; - out0[2] = in0[2] >> 8 | in0[3] << 24; - out0[3] = in0[3] >> 8 | in1[0] << 24; - out1[0] = in1[0] >> 8 | in1[1] << 24; - out1[1] = in1[1] >> 8 | in1[2] << 24; - out1[2] = in1[2] >> 8 | in1[3] << 24; - out1[3] = in1[3] >> 8; + out0[0] = amd_bytealign_S (in0[1], in0[0], 1); + out0[1] = amd_bytealign_S (in0[2], in0[1], 1); + out0[2] = amd_bytealign_S (in0[3], in0[2], 1); + out0[3] = amd_bytealign_S (in1[0], in0[3], 1); + out1[0] = amd_bytealign_S (in1[1], in1[0], 1); + out1[1] = amd_bytealign_S (in1[2], in1[1], 1); + out1[2] = amd_bytealign_S (in1[3], in1[2], 1); + out1[3] = amd_bytealign_S ( 0, in1[3], 1); } static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { - out1[3] = in1[3] << 8 | in1[2] >> 24; - out1[2] = in1[2] << 8 | in1[1] >> 24; - out1[1] = in1[1] << 8 | in1[0] >> 24; - out1[0] = in1[0] << 8 | in0[3] >> 24; - out0[3] = in0[3] << 8 | in0[2] >> 24; - out0[2] = in0[2] << 8 | in0[1] >> 24; - out0[1] = in0[1] << 8 | in0[0] >> 24; - out0[0] = in0[0] << 8; -} - -static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) -{ - switch (num) - { - case 0: out1[3] = in1[3]; - out1[2] = in1[2]; - out1[1] = in1[1]; - out1[0] = in1[0]; - out0[3] = in0[3]; - out0[2] = in0[2]; - out0[1] = in0[1]; - out0[0] = in0[0]; - break; - case 1: out1[3] = in1[3] << 8 | in1[2] >> 24; - out1[2] = in1[2] << 8 | in1[1] >> 24; - out1[1] = in1[1] << 8 | in1[0] >> 24; - out1[0] = in1[0] << 8 | in0[3] >> 24; - out0[3] = in0[3] << 8 | in0[2] >> 24; - out0[2] = in0[2] << 8 | in0[1] >> 24; - out0[1] = in0[1] << 8 | in0[0] >> 24; - out0[0] = in0[0] << 8; - break; - case 2: out1[3] = in1[3] << 16 | in1[2] >> 16; - out1[2] = in1[2] << 16 | in1[1] >> 16; - out1[1] = in1[1] << 16 | in1[0] >> 16; - out1[0] = in1[0] << 16 | in0[3] >> 16; - out0[3] = in0[3] << 16 | in0[2] >> 16; - out0[2] = in0[2] << 16 | in0[1] >> 16; - out0[1] = in0[1] << 16 | in0[0] >> 16; - out0[0] = in0[0] << 16; - break; - case 3: out1[3] = in1[3] << 24 | in1[2] >> 8; - out1[2] = in1[2] << 24 | in1[1] >> 8; - out1[1] = in1[1] << 24 | in1[0] >> 8; - out1[0] = in1[0] << 24 | in0[3] >> 8; - out0[3] = in0[3] << 24 | in0[2] >> 8; - out0[2] = in0[2] << 24 | in0[1] >> 8; - out0[1] = in0[1] << 24 | in0[0] >> 8; - out0[0] = in0[0] << 24; - break; - case 4: out1[3] = in1[2]; - out1[2] = in1[1]; - out1[1] = in1[0]; - out1[0] = in0[3]; - out0[3] = in0[2]; - out0[2] = in0[1]; - out0[1] = in0[0]; - out0[0] = 0; - break; - case 5: out1[3] = in1[2] << 8 | in1[1] >> 24; - out1[2] = in1[1] << 8 | in1[0] >> 24; - out1[1] = in1[0] << 8 | in0[3] >> 24; - out1[0] = in0[3] << 8 | in0[2] >> 24; - out0[3] = in0[2] << 8 | in0[1] >> 24; - out0[2] = in0[1] << 8 | in0[0] >> 24; - out0[1] = in0[0] << 8; - out0[0] = 0; - break; - case 6: out1[3] = in1[2] << 16 | in1[1] >> 16; - out1[2] = in1[1] << 16 | in1[0] >> 16; - out1[1] = in1[0] << 16 | in0[3] >> 16; - out1[0] = in0[3] << 16 | in0[2] >> 16; - out0[3] = in0[2] << 16 | in0[1] >> 16; - out0[2] = in0[1] << 16 | in0[0] >> 16; - out0[1] = in0[0] << 16; - out0[0] = 0; - break; - case 7: out1[3] = in1[2] << 24 | in1[1] >> 8; - out1[2] = in1[1] << 24 | in1[0] >> 8; - out1[1] = in1[0] << 24 | in0[3] >> 8; - out1[0] = in0[3] << 24 | in0[2] >> 8; - out0[3] = in0[2] << 24 | in0[1] >> 8; - out0[2] = in0[1] << 24 | in0[0] >> 8; - out0[1] = in0[0] << 24; - out0[0] = 0; - break; - case 8: out1[3] = in1[1]; - out1[2] = in1[0]; - out1[1] = in0[3]; - out1[0] = in0[2]; - out0[3] = in0[1]; - out0[2] = in0[0]; - out0[1] = 0; - out0[0] = 0; - break; - case 9: out1[3] = in1[1] << 8 | in1[0] >> 24; - out1[2] = in1[0] << 8 | in0[3] >> 24; - out1[1] = in0[3] << 8 | in0[2] >> 24; - out1[0] = in0[2] << 8 | in0[1] >> 24; - out0[3] = in0[1] << 8 | in0[0] >> 24; - out0[2] = in0[0] << 8; - out0[1] = 0; - out0[0] = 0; - break; - case 10: out1[3] = in1[1] << 16 | in1[0] >> 16; - out1[2] = in1[0] << 16 | in0[3] >> 16; - out1[1] = in0[3] << 16 | in0[2] >> 16; - out1[0] = in0[2] << 16 | in0[1] >> 16; - out0[3] = in0[1] << 16 | in0[0] >> 16; - out0[2] = in0[0] << 16; - out0[1] = 0; - out0[0] = 0; - break; - case 11: out1[3] = in1[1] << 24 | in1[0] >> 8; - out1[2] = in1[0] << 24 | in0[3] >> 8; - out1[1] = in0[3] << 24 | in0[2] >> 8; - out1[0] = in0[2] << 24 | in0[1] >> 8; - out0[3] = in0[1] << 24 | in0[0] >> 8; - out0[2] = in0[0] << 24; - out0[1] = 0; - out0[0] = 0; - break; - case 12: out1[3] = in1[0]; - out1[2] = in0[3]; - out1[1] = in0[2]; - out1[0] = in0[1]; - out0[3] = in0[0]; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 13: out1[3] = in1[0] << 8 | in0[3] >> 24; - out1[2] = in0[3] << 8 | in0[2] >> 24; - out1[1] = in0[2] << 8 | in0[1] >> 24; - out1[0] = in0[1] << 8 | in0[0] >> 24; - out0[3] = in0[0] << 8; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 14: out1[3] = in1[0] << 16 | in0[3] >> 16; - out1[2] = in0[3] << 16 | in0[2] >> 16; - out1[1] = in0[2] << 16 | in0[1] >> 16; - out1[0] = in0[1] << 16 | in0[0] >> 16; - out0[3] = in0[0] << 16; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 15: out1[3] = in1[0] << 24 | in0[3] >> 8; - out1[2] = in0[3] << 24 | in0[2] >> 8; - out1[1] = in0[2] << 24 | in0[1] >> 8; - out1[0] = in0[1] << 24 | in0[0] >> 8; - out0[3] = in0[0] << 24; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 16: out1[3] = in0[3]; - out1[2] = in0[2]; - out1[1] = in0[1]; - out1[0] = in0[0]; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 17: out1[3] = in0[3] << 8 | in0[2] >> 24; - out1[2] = in0[2] << 8 | in0[1] >> 24; - out1[1] = in0[1] << 8 | in0[0] >> 24; - out1[0] = in0[0] << 8; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 18: out1[3] = in0[3] << 16 | in0[2] >> 16; - out1[2] = in0[2] << 16 | in0[1] >> 16; - out1[1] = in0[1] << 16 | in0[0] >> 16; - out1[0] = in0[0] << 16; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 19: out1[3] = in0[3] << 24 | in0[2] >> 8; - out1[2] = in0[2] << 24 | in0[1] >> 8; - out1[1] = in0[1] << 24 | in0[0] >> 8; - out1[0] = in0[0] << 24; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 20: out1[3] = in0[2]; - out1[2] = in0[1]; - out1[1] = in0[0]; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 21: out1[3] = in0[2] << 8 | in0[1] >> 24; - out1[2] = in0[1] << 8 | in0[0] >> 24; - out1[1] = in0[0] << 8; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 22: out1[3] = in0[2] << 16 | in0[1] >> 16; - out1[2] = in0[1] << 16 | in0[0] >> 16; - out1[1] = in0[0] << 16; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 23: out1[3] = in0[2] << 24 | in0[1] >> 8; - out1[2] = in0[1] << 24 | in0[0] >> 8; - out1[1] = in0[0] << 24; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 24: out1[3] = in0[1]; - out1[2] = in0[0]; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 25: out1[3] = in0[1] << 8 | in0[0] >> 24; - out1[2] = in0[0] << 8; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 26: out1[3] = in0[1] << 16 | in0[0] >> 16; - out1[2] = in0[0] << 16; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 27: out1[3] = in0[1] << 24 | in0[0] >> 8; - out1[2] = in0[0] << 24; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 28: out1[3] = in0[0]; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 29: out1[3] = in0[0] << 8; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 30: out1[3] = in0[0] << 16; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 31: out1[3] = in0[0] << 24; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - } + out1[3] = amd_bytealign_S (in1[3], in1[2], 3); + out1[2] = amd_bytealign_S (in1[2], in1[1], 3); + out1[1] = amd_bytealign_S (in1[1], in1[0], 3); + out1[0] = amd_bytealign_S (in1[0], in0[3], 3); + out0[3] = amd_bytealign_S (in0[3], in0[2], 3); + out0[2] = amd_bytealign_S (in0[2], in0[1], 3); + out0[1] = amd_bytealign_S (in0[1], in0[0], 3); + out0[0] = amd_bytealign_S (in0[0], 0, 3); } static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) @@ -470,32 +175,32 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = in1[2]; out1[3] = in1[3]; break; - case 1: out0[0] = in0[0] >> 8 | in0[1] << 24; - out0[1] = in0[1] >> 8 | in0[2] << 24; - out0[2] = in0[2] >> 8 | in0[3] << 24; - out0[3] = in0[3] >> 8 | in1[0] << 24; - out1[0] = in1[0] >> 8 | in1[1] << 24; - out1[1] = in1[1] >> 8 | in1[2] << 24; - out1[2] = in1[2] >> 8 | in1[3] << 24; - out1[3] = in1[3] >> 8; + case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1); + out0[1] = amd_bytealign_S (in0[2], in0[1], 1); + out0[2] = amd_bytealign_S (in0[3], in0[2], 1); + out0[3] = amd_bytealign_S (in1[0], in0[3], 1); + out1[0] = amd_bytealign_S (in1[1], in1[0], 1); + out1[1] = amd_bytealign_S (in1[2], in1[1], 1); + out1[2] = amd_bytealign_S (in1[3], in1[2], 1); + out1[3] = amd_bytealign_S ( 0, in1[3], 1); break; - case 2: out0[0] = in0[0] >> 16 | in0[1] << 16; - out0[1] = in0[1] >> 16 | in0[2] << 16; - out0[2] = in0[2] >> 16 | in0[3] << 16; - out0[3] = in0[3] >> 16 | in1[0] << 16; - out1[0] = in1[0] >> 16 | in1[1] << 16; - out1[1] = in1[1] >> 16 | in1[2] << 16; - out1[2] = in1[2] >> 16 | in1[3] << 16; - out1[3] = in1[3] >> 16; + case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2); + out0[1] = amd_bytealign_S (in0[2], in0[1], 2); + out0[2] = amd_bytealign_S (in0[3], in0[2], 2); + out0[3] = amd_bytealign_S (in1[0], in0[3], 2); + out1[0] = amd_bytealign_S (in1[1], in1[0], 2); + out1[1] = amd_bytealign_S (in1[2], in1[1], 2); + out1[2] = amd_bytealign_S (in1[3], in1[2], 2); + out1[3] = amd_bytealign_S ( 0, in1[3], 2); break; - case 3: out0[0] = in0[0] >> 24 | in0[1] << 8; - out0[1] = in0[1] >> 24 | in0[2] << 8; - out0[2] = in0[2] >> 24 | in0[3] << 8; - out0[3] = in0[3] >> 24 | in1[0] << 8; - out1[0] = in1[0] >> 24 | in1[1] << 8; - out1[1] = in1[1] >> 24 | in1[2] << 8; - out1[2] = in1[2] >> 24 | in1[3] << 8; - out1[3] = in1[3] >> 24; + case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3); + out0[1] = amd_bytealign_S (in0[2], in0[1], 3); + out0[2] = amd_bytealign_S (in0[3], in0[2], 3); + out0[3] = amd_bytealign_S (in1[0], in0[3], 3); + out1[0] = amd_bytealign_S (in1[1], in1[0], 3); + out1[1] = amd_bytealign_S (in1[2], in1[1], 3); + out1[2] = amd_bytealign_S (in1[3], in1[2], 3); + out1[3] = amd_bytealign_S ( 0, in1[3], 3); break; case 4: out0[0] = in0[1]; out0[1] = in0[2]; @@ -506,31 +211,31 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = in1[3]; out1[3] = 0; break; - case 5: out0[0] = in0[1] >> 8 | in0[2] << 24; - out0[1] = in0[2] >> 8 | in0[3] << 24; - out0[2] = in0[3] >> 8 | in1[0] << 24; - out0[3] = in1[0] >> 8 | in1[1] << 24; - out1[0] = in1[1] >> 8 | in1[2] << 24; - out1[1] = in1[2] >> 8 | in1[3] << 24; - out1[2] = in1[3] >> 8; + case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1); + out0[1] = amd_bytealign_S (in0[3], in0[2], 1); + out0[2] = amd_bytealign_S (in1[0], in0[3], 1); + out0[3] = amd_bytealign_S (in1[1], in1[0], 1); + out1[0] = amd_bytealign_S (in1[2], in1[1], 1); + out1[1] = amd_bytealign_S (in1[3], in1[2], 1); + out1[2] = amd_bytealign_S ( 0, in1[3], 1); out1[3] = 0; break; - case 6: out0[0] = in0[1] >> 16 | in0[2] << 16; - out0[1] = in0[2] >> 16 | in0[3] << 16; - out0[2] = in0[3] >> 16 | in1[0] << 16; - out0[3] = in1[0] >> 16 | in1[1] << 16; - out1[0] = in1[1] >> 16 | in1[2] << 16; - out1[1] = in1[2] >> 16 | in1[3] << 16; - out1[2] = in1[3] >> 16; + case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2); + out0[1] = amd_bytealign_S (in0[3], in0[2], 2); + out0[2] = amd_bytealign_S (in1[0], in0[3], 2); + out0[3] = amd_bytealign_S (in1[1], in1[0], 2); + out1[0] = amd_bytealign_S (in1[2], in1[1], 2); + out1[1] = amd_bytealign_S (in1[3], in1[2], 2); + out1[2] = amd_bytealign_S ( 0, in1[3], 2); out1[3] = 0; break; - case 7: out0[0] = in0[1] >> 24 | in0[2] << 8; - out0[1] = in0[2] >> 24 | in0[3] << 8; - out0[2] = in0[3] >> 24 | in1[0] << 8; - out0[3] = in1[0] >> 24 | in1[1] << 8; - out1[0] = in1[1] >> 24 | in1[2] << 8; - out1[1] = in1[2] >> 24 | in1[3] << 8; - out1[2] = in1[3] >> 24; + case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3); + out0[1] = amd_bytealign_S (in0[3], in0[2], 3); + out0[2] = amd_bytealign_S (in1[0], in0[3], 3); + out0[3] = amd_bytealign_S (in1[1], in1[0], 3); + out1[0] = amd_bytealign_S (in1[2], in1[1], 3); + out1[1] = amd_bytealign_S (in1[3], in1[2], 3); + out1[2] = amd_bytealign_S ( 0, in1[3], 3); out1[3] = 0; break; case 8: out0[0] = in0[2]; @@ -542,30 +247,30 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 9: out0[0] = in0[2] >> 8 | in0[3] << 24; - out0[1] = in0[3] >> 8 | in1[0] << 24; - out0[2] = in1[0] >> 8 | in1[1] << 24; - out0[3] = in1[1] >> 8 | in1[2] << 24; - out1[0] = in1[2] >> 8 | in1[3] << 24; - out1[1] = in1[3] >> 8; + case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1); + out0[1] = amd_bytealign_S (in1[0], in0[3], 1); + out0[2] = amd_bytealign_S (in1[1], in1[0], 1); + out0[3] = amd_bytealign_S (in1[2], in1[1], 1); + out1[0] = amd_bytealign_S (in1[3], in1[2], 1); + out1[1] = amd_bytealign_S ( 0, in1[3], 1); out1[2] = 0; out1[3] = 0; break; - case 10: out0[0] = in0[2] >> 16 | in0[3] << 16; - out0[1] = in0[3] >> 16 | in1[0] << 16; - out0[2] = in1[0] >> 16 | in1[1] << 16; - out0[3] = in1[1] >> 16 | in1[2] << 16; - out1[0] = in1[2] >> 16 | in1[3] << 16; - out1[1] = in1[3] >> 16; + case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2); + out0[1] = amd_bytealign_S (in1[0], in0[3], 2); + out0[2] = amd_bytealign_S (in1[1], in1[0], 2); + out0[3] = amd_bytealign_S (in1[2], in1[1], 2); + out1[0] = amd_bytealign_S (in1[3], in1[2], 2); + out1[1] = amd_bytealign_S ( 0, in1[3], 2); out1[2] = 0; out1[3] = 0; break; - case 11: out0[0] = in0[2] >> 24 | in0[3] << 8; - out0[1] = in0[3] >> 24 | in1[0] << 8; - out0[2] = in1[0] >> 24 | in1[1] << 8; - out0[3] = in1[1] >> 24 | in1[2] << 8; - out1[0] = in1[2] >> 24 | in1[3] << 8; - out1[1] = in1[3] >> 24; + case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3); + out0[1] = amd_bytealign_S (in1[0], in0[3], 3); + out0[2] = amd_bytealign_S (in1[1], in1[0], 3); + out0[3] = amd_bytealign_S (in1[2], in1[1], 3); + out1[0] = amd_bytealign_S (in1[3], in1[2], 3); + out1[1] = amd_bytealign_S ( 0, in1[3], 3); out1[2] = 0; out1[3] = 0; break; @@ -578,30 +283,29 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 13: - out0[0] = in0[3] >> 8 | in1[0] << 24; - out0[1] = in1[0] >> 8 | in1[1] << 24; - out0[2] = in1[1] >> 8 | in1[2] << 24; - out0[3] = in1[2] >> 8 | in1[3] << 24; - out1[0] = in1[3] >> 8; + case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1); + out0[1] = amd_bytealign_S (in1[1], in1[0], 1); + out0[2] = amd_bytealign_S (in1[2], in1[1], 1); + out0[3] = amd_bytealign_S (in1[3], in1[2], 1); + out1[0] = amd_bytealign_S ( 0, in1[3], 1); out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 14: out0[0] = in0[3] >> 16 | in1[0] << 16; - out0[1] = in1[0] >> 16 | in1[1] << 16; - out0[2] = in1[1] >> 16 | in1[2] << 16; - out0[3] = in1[2] >> 16 | in1[3] << 16; - out1[0] = in1[3] >> 16; + case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2); + out0[1] = amd_bytealign_S (in1[1], in1[0], 2); + out0[2] = amd_bytealign_S (in1[2], in1[1], 2); + out0[3] = amd_bytealign_S (in1[3], in1[2], 2); + out1[0] = amd_bytealign_S ( 0, in1[3], 2); out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 15: out0[0] = in0[3] >> 24 | in1[0] << 8; - out0[1] = in1[0] >> 24 | in1[1] << 8; - out0[2] = in1[1] >> 24 | in1[2] << 8; - out0[3] = in1[2] >> 24 | in1[3] << 8; - out1[0] = in1[3] >> 24; + case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3); + out0[1] = amd_bytealign_S (in1[1], in1[0], 3); + out0[2] = amd_bytealign_S (in1[2], in1[1], 3); + out0[3] = amd_bytealign_S (in1[3], in1[2], 3); + out1[0] = amd_bytealign_S ( 0, in1[3], 3); out1[1] = 0; out1[2] = 0; out1[3] = 0; @@ -615,28 +319,28 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 17: out0[0] = in1[0] >> 8 | in1[1] << 24; - out0[1] = in1[1] >> 8 | in1[2] << 24; - out0[2] = in1[2] >> 8 | in1[3] << 24; - out0[3] = in1[3] >> 8; + case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1); + out0[1] = amd_bytealign_S (in1[2], in1[1], 1); + out0[2] = amd_bytealign_S (in1[3], in1[2], 1); + out0[3] = amd_bytealign_S ( 0, in1[3], 1); out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 18: out0[0] = in1[0] >> 16 | in1[1] << 16; - out0[1] = in1[1] >> 16 | in1[2] << 16; - out0[2] = in1[2] >> 16 | in1[3] << 16; - out0[3] = in1[3] >> 16; + case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2); + out0[1] = amd_bytealign_S (in1[2], in1[1], 2); + out0[2] = amd_bytealign_S (in1[3], in1[2], 2); + out0[3] = amd_bytealign_S ( 0, in1[3], 2); out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 19: out0[0] = in1[0] >> 24 | in1[1] << 8; - out0[1] = in1[1] >> 24 | in1[2] << 8; - out0[2] = in1[2] >> 24 | in1[3] << 8; - out0[3] = in1[3] >> 24; + case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3); + out0[1] = amd_bytealign_S (in1[2], in1[1], 3); + out0[2] = amd_bytealign_S (in1[3], in1[2], 3); + out0[3] = amd_bytealign_S ( 0, in1[3], 3); out1[0] = 0; out1[1] = 0; out1[2] = 0; @@ -651,27 +355,27 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 21: out0[0] = in1[1] >> 8 | in1[2] << 24; - out0[1] = in1[2] >> 8 | in1[3] << 24; - out0[2] = in1[3] >> 8; + case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1); + out0[1] = amd_bytealign_S (in1[3], in1[2], 1); + out0[2] = amd_bytealign_S ( 0, in1[3], 1); out0[3] = 0; out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 22: out0[0] = in1[1] >> 16 | in1[2] << 16; - out0[1] = in1[2] >> 16 | in1[3] << 16; - out0[2] = in1[3] >> 16; + case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2); + out0[1] = amd_bytealign_S (in1[3], in1[2], 2); + out0[2] = amd_bytealign_S ( 0, in1[3], 2); out0[3] = 0; out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 23: out0[0] = in1[1] >> 24 | in1[2] << 8; - out0[1] = in1[2] >> 24 | in1[3] << 8; - out0[2] = in1[3] >> 24; + case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3); + out0[1] = amd_bytealign_S (in1[3], in1[2], 3); + out0[2] = amd_bytealign_S ( 0, in1[3], 3); out0[3] = 0; out1[0] = 0; out1[1] = 0; @@ -687,8 +391,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 25: out0[0] = in1[2] >> 8 | in1[3] << 24; - out0[1] = in1[3] >> 8; + case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1); + out0[1] = amd_bytealign_S ( 0, in1[3], 1); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -696,8 +400,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 26: out0[0] = in1[2] >> 16 | in1[3] << 16; - out0[1] = in1[3] >> 16; + case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2); + out0[1] = amd_bytealign_S ( 0, in1[3], 2); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -705,8 +409,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 27: out0[0] = in1[2] >> 24 | in1[3] << 8; - out0[1] = in1[3] >> 24; + case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3); + out0[1] = amd_bytealign_S ( 0, in1[3], 3); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -723,7 +427,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 29: out0[0] = in1[3] >> 8; + case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -732,7 +436,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 30: out0[0] = in1[3] >> 16; + case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -741,7 +445,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 31: out0[0] = in1[3] >> 24; + case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -753,6 +457,301 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 } } +static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) +{ + switch (num) + { + case 0: out1[3] = in1[3]; + out1[2] = in1[2]; + out1[1] = in1[1]; + out1[0] = in1[0]; + out0[3] = in0[3]; + out0[2] = in0[2]; + out0[1] = in0[1]; + out0[0] = in0[0]; + break; + case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3); + out1[2] = amd_bytealign_S (in1[2], in1[1], 3); + out1[1] = amd_bytealign_S (in1[1], in1[0], 3); + out1[0] = amd_bytealign_S (in1[0], in0[3], 3); + out0[3] = amd_bytealign_S (in0[3], in0[2], 3); + out0[2] = amd_bytealign_S (in0[2], in0[1], 3); + out0[1] = amd_bytealign_S (in0[1], in0[0], 3); + out0[0] = amd_bytealign_S (in0[0], 0, 3); + break; + case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2); + out1[2] = amd_bytealign_S (in1[2], in1[1], 2); + out1[1] = amd_bytealign_S (in1[1], in1[0], 2); + out1[0] = amd_bytealign_S (in1[0], in0[3], 2); + out0[3] = amd_bytealign_S (in0[3], in0[2], 2); + out0[2] = amd_bytealign_S (in0[2], in0[1], 2); + out0[1] = amd_bytealign_S (in0[1], in0[0], 2); + out0[0] = amd_bytealign_S (in0[0], 0, 2); + break; + case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1); + out1[2] = amd_bytealign_S (in1[2], in1[1], 1); + out1[1] = amd_bytealign_S (in1[1], in1[0], 1); + out1[0] = amd_bytealign_S (in1[0], in0[3], 1); + out0[3] = amd_bytealign_S (in0[3], in0[2], 1); + out0[2] = amd_bytealign_S (in0[2], in0[1], 1); + out0[1] = amd_bytealign_S (in0[1], in0[0], 1); + out0[0] = amd_bytealign_S (in0[0], 0, 1); + break; + case 4: out1[3] = in1[2]; + out1[2] = in1[1]; + out1[1] = in1[0]; + out1[0] = in0[3]; + out0[3] = in0[2]; + out0[2] = in0[1]; + out0[1] = in0[0]; + out0[0] = 0; + break; + case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3); + out1[2] = amd_bytealign_S (in1[1], in1[0], 3); + out1[1] = amd_bytealign_S (in1[0], in0[3], 3); + out1[0] = amd_bytealign_S (in0[3], in0[2], 3); + out0[3] = amd_bytealign_S (in0[2], in0[1], 3); + out0[2] = amd_bytealign_S (in0[1], in0[0], 3); + out0[1] = amd_bytealign_S (in0[0], 0, 3); + out0[0] = 0; + break; + case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2); + out1[2] = amd_bytealign_S (in1[1], in1[0], 2); + out1[1] = amd_bytealign_S (in1[0], in0[3], 2); + out1[0] = amd_bytealign_S (in0[3], in0[2], 2); + out0[3] = amd_bytealign_S (in0[2], in0[1], 2); + out0[2] = amd_bytealign_S (in0[1], in0[0], 2); + out0[1] = amd_bytealign_S (in0[0], 0, 2); + out0[0] = 0; + break; + case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1); + out1[2] = amd_bytealign_S (in1[1], in1[0], 1); + out1[1] = amd_bytealign_S (in1[0], in0[3], 1); + out1[0] = amd_bytealign_S (in0[3], in0[2], 1); + out0[3] = amd_bytealign_S (in0[2], in0[1], 1); + out0[2] = amd_bytealign_S (in0[1], in0[0], 1); + out0[1] = amd_bytealign_S (in0[0], 0, 1); + out0[0] = 0; + break; + case 8: out1[3] = in1[1]; + out1[2] = in1[0]; + out1[1] = in0[3]; + out1[0] = in0[2]; + out0[3] = in0[1]; + out0[2] = in0[0]; + out0[1] = 0; + out0[0] = 0; + break; + case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3); + out1[2] = amd_bytealign_S (in1[0], in0[3], 3); + out1[1] = amd_bytealign_S (in0[3], in0[2], 3); + out1[0] = amd_bytealign_S (in0[2], in0[1], 3); + out0[3] = amd_bytealign_S (in0[1], in0[0], 3); + out0[2] = amd_bytealign_S (in0[0], 0, 3); + out0[1] = 0; + out0[0] = 0; + break; + case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2); + out1[2] = amd_bytealign_S (in1[0], in0[3], 2); + out1[1] = amd_bytealign_S (in0[3], in0[2], 2); + out1[0] = amd_bytealign_S (in0[2], in0[1], 2); + out0[3] = amd_bytealign_S (in0[1], in0[0], 2); + out0[2] = amd_bytealign_S (in0[0], 0, 2); + out0[1] = 0; + out0[0] = 0; + break; + case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1); + out1[2] = amd_bytealign_S (in1[0], in0[3], 1); + out1[1] = amd_bytealign_S (in0[3], in0[2], 1); + out1[0] = amd_bytealign_S (in0[2], in0[1], 1); + out0[3] = amd_bytealign_S (in0[1], in0[0], 1); + out0[2] = amd_bytealign_S (in0[0], 0, 1); + out0[1] = 0; + out0[0] = 0; + break; + case 12: out1[3] = in1[0]; + out1[2] = in0[3]; + out1[1] = in0[2]; + out1[0] = in0[1]; + out0[3] = in0[0]; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3); + out1[2] = amd_bytealign_S (in0[3], in0[2], 3); + out1[1] = amd_bytealign_S (in0[2], in0[1], 3); + out1[0] = amd_bytealign_S (in0[1], in0[0], 3); + out0[3] = amd_bytealign_S (in0[0], 0, 3); + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2); + out1[2] = amd_bytealign_S (in0[3], in0[2], 2); + out1[1] = amd_bytealign_S (in0[2], in0[1], 2); + out1[0] = amd_bytealign_S (in0[1], in0[0], 2); + out0[3] = amd_bytealign_S (in0[0], 0, 2); + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1); + out1[2] = amd_bytealign_S (in0[3], in0[2], 1); + out1[1] = amd_bytealign_S (in0[2], in0[1], 1); + out1[0] = amd_bytealign_S (in0[1], in0[0], 1); + out0[3] = amd_bytealign_S (in0[0], 0, 1); + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 16: out1[3] = in0[3]; + out1[2] = in0[2]; + out1[1] = in0[1]; + out1[0] = in0[0]; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3); + out1[2] = amd_bytealign_S (in0[2], in0[1], 3); + out1[1] = amd_bytealign_S (in0[1], in0[0], 3); + out1[0] = amd_bytealign_S (in0[0], 0, 3); + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2); + out1[2] = amd_bytealign_S (in0[2], in0[1], 2); + out1[1] = amd_bytealign_S (in0[1], in0[0], 2); + out1[0] = amd_bytealign_S (in0[0], 0, 2); + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1); + out1[2] = amd_bytealign_S (in0[2], in0[1], 1); + out1[1] = amd_bytealign_S (in0[1], in0[0], 1); + out1[0] = amd_bytealign_S (in0[0], 0, 1); + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 20: out1[3] = in0[2]; + out1[2] = in0[1]; + out1[1] = in0[0]; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3); + out1[2] = amd_bytealign_S (in0[1], in0[0], 3); + out1[1] = amd_bytealign_S (in0[0], 0, 3); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2); + out1[2] = amd_bytealign_S (in0[1], in0[0], 2); + out1[1] = amd_bytealign_S (in0[0], 0, 2); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1); + out1[2] = amd_bytealign_S (in0[1], in0[0], 1); + out1[1] = amd_bytealign_S (in0[0], 0, 1); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 24: out1[3] = in0[1]; + out1[2] = in0[0]; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3); + out1[2] = amd_bytealign_S (in0[0], 0, 3); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2); + out1[2] = amd_bytealign_S (in0[0], 0, 2); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1); + out1[2] = amd_bytealign_S (in0[0], 0, 1); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 28: out1[3] = in0[0]; + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3); + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2); + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1); + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + } +} + static void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_r0) { // this version works with 1 byte append only @@ -775,10 +774,6 @@ static void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4]) { - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset; - u32 s0 = 0; u32 s1 = 0; u32 s2 = 0; @@ -787,64 +782,68 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 u32 s5 = 0; u32 s6 = 0; u32 s7 = 0; - u32 s8 = 0; + + const u32 src_r00 = swap32_S (src_r0[0]); + const u32 src_r01 = swap32_S (src_r0[1]); + const u32 src_r02 = swap32_S (src_r0[2]); + const u32 src_r03 = swap32_S (src_r0[3]); + const u32 src_r10 = swap32_S (src_r1[0]); + const u32 src_r11 = swap32_S (src_r1[1]); + const u32 src_r12 = swap32_S (src_r1[2]); + const u32 src_r13 = swap32_S (src_r1[3]); switch (offset / 4) { case 0: - s8 = amd_bytealign ( 0, src_r1[3], offset_minus_4); - s7 = amd_bytealign (src_r1[3], src_r1[2], offset_minus_4); - s6 = amd_bytealign (src_r1[2], src_r1[1], offset_minus_4); - s5 = amd_bytealign (src_r1[1], src_r1[0], offset_minus_4); - s4 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4); - s3 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4); - s2 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4); - s1 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s0 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r12, src_r13, offset); + s6 = amd_bytealign_S (src_r11, src_r12, offset); + s5 = amd_bytealign_S (src_r10, src_r11, offset); + s4 = amd_bytealign_S (src_r03, src_r10, offset); + s3 = amd_bytealign_S (src_r02, src_r03, offset); + s2 = amd_bytealign_S (src_r01, src_r02, offset); + s1 = amd_bytealign_S (src_r00, src_r01, offset); + s0 = amd_bytealign_S ( 0, src_r00, offset); break; case 1: - s8 = amd_bytealign ( 0, src_r1[2], offset_minus_4); - s7 = amd_bytealign (src_r1[2], src_r1[1], offset_minus_4); - s6 = amd_bytealign (src_r1[1], src_r1[0], offset_minus_4); - s5 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4); - s4 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4); - s3 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4); - s2 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s1 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r11, src_r12, offset); + s6 = amd_bytealign_S (src_r10, src_r11, offset); + s5 = amd_bytealign_S (src_r03, src_r10, offset); + s4 = amd_bytealign_S (src_r02, src_r03, offset); + s3 = amd_bytealign_S (src_r01, src_r02, offset); + s2 = amd_bytealign_S (src_r00, src_r01, offset); + s1 = amd_bytealign_S ( 0, src_r00, offset); s0 = 0; break; case 2: - s8 = amd_bytealign ( 0, src_r1[1], offset_minus_4); - s7 = amd_bytealign (src_r1[1], src_r1[0], offset_minus_4); - s6 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4); - s5 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4); - s4 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4); - s3 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s2 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r10, src_r11, offset); + s6 = amd_bytealign_S (src_r03, src_r10, offset); + s5 = amd_bytealign_S (src_r02, src_r03, offset); + s4 = amd_bytealign_S (src_r01, src_r02, offset); + s3 = amd_bytealign_S (src_r00, src_r01, offset); + s2 = amd_bytealign_S ( 0, src_r00, offset); s1 = 0; s0 = 0; break; case 3: - s8 = amd_bytealign ( 0, src_r1[0], offset_minus_4); - s7 = amd_bytealign (src_r1[0], src_r0[3], offset_minus_4); - s6 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4); - s5 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4); - s4 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s3 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r03, src_r10, offset); + s6 = amd_bytealign_S (src_r02, src_r03, offset); + s5 = amd_bytealign_S (src_r01, src_r02, offset); + s4 = amd_bytealign_S (src_r00, src_r01, offset); + s3 = amd_bytealign_S ( 0, src_r00, offset); s2 = 0; s1 = 0; s0 = 0; + break; case 4: - s8 = amd_bytealign ( 0, src_r0[3], offset_minus_4); - s7 = amd_bytealign (src_r0[3], src_r0[2], offset_minus_4); - s6 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4); - s5 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s4 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r02, src_r03, offset); + s6 = amd_bytealign_S (src_r01, src_r02, offset); + s5 = amd_bytealign_S (src_r00, src_r01, offset); + s4 = amd_bytealign_S ( 0, src_r00, offset); s3 = 0; s2 = 0; s1 = 0; @@ -852,10 +851,9 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 break; case 5: - s8 = amd_bytealign ( 0, src_r0[2], offset_minus_4); - s7 = amd_bytealign (src_r0[2], src_r0[1], offset_minus_4); - s6 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s5 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r01, src_r02, offset); + s6 = amd_bytealign_S (src_r00, src_r01, offset); + s5 = amd_bytealign_S ( 0, src_r00, offset); s4 = 0; s3 = 0; s2 = 0; @@ -864,9 +862,8 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 break; case 6: - s8 = amd_bytealign ( 0, src_r0[1], offset_minus_4); - s7 = amd_bytealign (src_r0[1], src_r0[0], offset_minus_4); - s6 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S (src_r00, src_r01, offset); + s6 = amd_bytealign_S ( 0, src_r00, offset); s5 = 0; s4 = 0; s3 = 0; @@ -876,8 +873,7 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 break; case 7: - s8 = amd_bytealign ( 0, src_r0[0], offset_minus_4); - s7 = amd_bytealign (src_r0[0], 0, offset_minus_4); + s7 = amd_bytealign_S ( 0, src_r00, offset); s6 = 0; s5 = 0; s4 = 0; @@ -888,28 +884,23 @@ static void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 break; } - if (offset_mod_4 == 0) - { - buf0[0] = src_l0[0] | s1; - buf0[1] = src_l0[1] | s2; - buf0[2] = src_l0[2] | s3; - buf0[3] = src_l0[3] | s4; - buf1[0] = src_l1[0] | s5; - buf1[1] = src_l1[1] | s6; - buf1[2] = src_l1[2] | s7; - buf1[3] = src_l1[3] | s8; - } - else - { - buf0[0] = src_l0[0] | s0; - buf0[1] = src_l0[1] | s1; - buf0[2] = src_l0[2] | s2; - buf0[3] = src_l0[3] | s3; - buf1[0] = src_l1[0] | s4; - buf1[1] = src_l1[1] | s5; - buf1[2] = src_l1[2] | s6; - buf1[3] = src_l1[3] | s7; - } + s0 = swap32_S (s0); + s1 = swap32_S (s1); + s2 = swap32_S (s2); + s3 = swap32_S (s3); + s4 = swap32_S (s4); + s5 = swap32_S (s5); + s6 = swap32_S (s6); + s7 = swap32_S (s7); + + buf0[0] = src_l0[0] | s0; + buf0[1] = src_l0[1] | s1; + buf0[2] = src_l0[2] | s2; + buf0[3] = src_l0[3] | s3; + buf1[0] = src_l1[0] | s4; + buf1[1] = src_l1[1] | s5; + buf1[2] = src_l1[2] | s6; + buf1[3] = src_l1[3] | s7; } static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len) @@ -1032,19 +1023,7 @@ static u32 rule_op_mangle_dupeword (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED cons u32 out_len = in_len; - u32 tib40[4]; - u32 tib41[4]; - - tib40[0] = buf0[0]; - tib40[1] = buf0[1]; - tib40[2] = buf0[2]; - tib40[3] = buf0[3]; - tib41[0] = buf1[0]; - tib41[1] = buf1[1]; - tib41[2] = buf1[2]; - tib41[3] = buf1[3]; - - append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41); + append_block8 (out_len, buf0, buf1, buf0, buf1, buf0, buf1); out_len += in_len; @@ -1149,7 +1128,7 @@ static u32 rule_op_mangle_rotate_left (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c return in_len; } -static u32 rule_op_mangle_rotate_right (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) +static u32 rule_op_mangle_rotate_right (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -1221,7 +1200,7 @@ static u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED con lshift_block (buf0, buf1, tib40, tib41); - const u32 ml = (1u << ((p0 & 3) * 8)) - 1; + const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; switch (p0 / 4) @@ -1322,7 +1301,7 @@ static u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u3 lshift_block_N (buf0, buf1, tib40, tib41, p1); - const u32 ml = (1u << ((p0 & 3) * 8)) - 1; + const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; switch (p0 / 4) @@ -1401,7 +1380,7 @@ static u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const const u32 p1n = p1 << ((p0 & 3) * 8); - const u32 ml = (1u << ((p0 & 3) * 8)) - 1; + const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = 0xffffff00 << ((p0 & 3) * 8); @@ -1538,7 +1517,7 @@ static u32 rule_op_mangle_purgechar (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED con { u32 out_len = 0; - u32 buf_in[8] = { 0 }; + u32 buf_in[8]; buf_in[0] = buf0[0]; buf_in[1] = buf0[1]; @@ -1575,14 +1554,6 @@ static u32 rule_op_mangle_purgechar (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED con return out_len; } -/* -static u32 rule_op_mangle_togglecase_rec (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) -{ - // TODO - return in_len; -} -*/ - static u32 rule_op_mangle_dupechar_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); @@ -2568,7 +2539,7 @@ u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, u32 buf0[4 return out_len; } -u32 apply_rules_optimized (u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len) +u32 apply_rules_optimized (const u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len) { u32 out_len = len; From f32e1139429e0608e6526324fb730e99f50cba59 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 20 Aug 2017 15:08:51 +0200 Subject: [PATCH 06/12] Add missing case in append_block() in pure kernel rule engine --- OpenCL/inc_rp.cl | 1 + src/rp_kernel_on_cpu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl index f263d281a..0365d59dc 100644 --- a/OpenCL/inc_rp.cl +++ b/OpenCL/inc_rp.cl @@ -116,6 +116,7 @@ static void append_block (const u32 *buf_src, const int off_src, u32 *buf_dst, c switch (left) { + case 4: append_four_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 3: append_three_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 2: append_two_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 1: append_one_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; diff --git a/src/rp_kernel_on_cpu.c b/src/rp_kernel_on_cpu.c index 2d6f30037..27927644e 100644 --- a/src/rp_kernel_on_cpu.c +++ b/src/rp_kernel_on_cpu.c @@ -141,6 +141,7 @@ static void append_block (const u32 *buf_src, const int off_src, u32 *buf_dst, c switch (left) { + case 4: append_four_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 3: append_three_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 2: append_two_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 1: append_one_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; From a4616e6c6d2b69aa4e4c39f6bccc339eea9a8655 Mon Sep 17 00:00:00 2001 From: philsmd Date: Mon, 21 Aug 2017 09:14:49 +0200 Subject: [PATCH 07/12] fixes #1338: hexify also all password of format $HEX[] --- docs/changes.txt | 1 + src/convert.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/docs/changes.txt b/docs/changes.txt index edee965e8..a5a6f9543 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -26,6 +26,7 @@ - Fixed an invalid progress value in status view if words from the base wordlist get rejected because of length - Fixed a parser error for mode -m 9820 = MS Office <= 2003 $3, SHA1 + RC4, collider #2 - Fixed a problem with changed current working directory, for instance by using --restore together with --remove +- Fixed a problem with the conversion to the $HEX[] format: convert/hexify also all passwords of the format $HEX[] - Fixed the calculation of device_name_chksum; should be done for each iteration - Fixed the estimated time value whenever the value is very large and overflows - Fixed the parsing of command line options. It doesn't show two times the same error about an invalid option anymore diff --git a/src/convert.c b/src/convert.c index 3c2f591ff..079043780 100644 --- a/src/convert.c +++ b/src/convert.c @@ -152,6 +152,19 @@ bool need_hexify (const u8 *buf, const int len, const char separator, bool alway } } + // also test if the password is of the format $HEX[]: + + if (rc == false) + { + if ((len & 1) == 0) + { + if (is_hexify (buf, len)) + { + rc = true; + } + } + } + return rc; } From 8853884f2ab64521a3023408a293cc0de0d3d08e Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 21 Aug 2017 16:04:43 +0200 Subject: [PATCH 08/12] Fix append_four_byte() in case sm8 is 0 --- OpenCL/inc_rp.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl index 0365d59dc..97383fb8c 100644 --- a/OpenCL/inc_rp.cl +++ b/OpenCL/inc_rp.cl @@ -29,6 +29,7 @@ static void append_four_byte (const u32 *buf_src, const int off_src, u32 *buf_ds u64 t64 = hl32_to_64 (buf_src[sd + 1], buf_src[sd + 0]); t64 >>= sm8; + t64 &= 0xffffffff; t64 <<= dm8; const u32 t0 = l32_from_64_S (t64); @@ -107,7 +108,7 @@ static void append_block (const u32 *buf_src, const int off_src, u32 *buf_dst, c { int i; - for (i = 0; i < len - 4; i += 4) + for (i = 0; i < len - 3; i += 4) { append_four_byte (buf_src, off_src + i, buf_dst, off_dst + i); } @@ -116,7 +117,6 @@ static void append_block (const u32 *buf_src, const int off_src, u32 *buf_dst, c switch (left) { - case 4: append_four_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 3: append_three_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 2: append_two_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; case 1: append_one_byte (buf_src, off_src + i, buf_dst, off_dst + i); break; From a8f339234429c7b993b164fd9c5db9dc526518c3 Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 22 Aug 2017 10:08:28 +0200 Subject: [PATCH 09/12] Startup: Show some attack-specific optimizer constraints on start, eg: minimum and maximum support password- and salt-length --- docs/changes.txt | 1 + src/main.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/docs/changes.txt b/docs/changes.txt index a5a6f9543..94481a3d6 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -47,6 +47,7 @@ - OpenCL Kernels: Updated default scrypt TMTO to be ideal for latest NV and AMD top models - OpenCL Kernels: Vectorized tons of slow kernels to improve CPU cracking speed - OpenCL Runtime: Updated AMD ROCm driver version check, warn if version < 1.1 +- Startup: Show some attack-specific optimizer constraints on start, eg: minimum and maximum support password- and salt-length - WPA cracking: Improved nonce-error-corrections mode to use a both positive and negative corrections ## diff --git a/src/main.c b/src/main.c index 5f3e91a16..480ce9926 100644 --- a/src/main.c +++ b/src/main.c @@ -468,6 +468,24 @@ static void main_outerloop_mainscreen (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, event_log_info (hashcat_ctx, NULL); + /** + * Optimizer constraints + */ + + event_log_info (hashcat_ctx, "Password length minimum: %u", hashconfig->pw_min); + event_log_info (hashcat_ctx, "Password length maximum: %u", hashconfig->pw_max); + + if (hashconfig->is_salted) + { + if (hashconfig->opti_type & OPTI_TYPE_RAW_HASH) + { + event_log_info (hashcat_ctx, "Salt length minimum: %u", hashconfig->salt_min); + event_log_info (hashcat_ctx, "Salt length maximum: %u", hashconfig->salt_max); + } + } + + event_log_info (hashcat_ctx, NULL); + /** * Watchdog and Temperature balance */ From 12d95fd22c7cab80e0e466946d5a3129eeb15b52 Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 22 Aug 2017 11:09:46 +0200 Subject: [PATCH 10/12] Added option --example-hashes to show an example hash for each hash-mode --- docs/changes.txt | 1 + extra/tab_completion/hashcat.sh | 4 +- include/terminal.h | 2 + include/types.h | 116 +++++++++++++++++--------------- src/bitmap.c | 13 ++-- src/combinator.c | 11 +-- src/cpt.c | 13 ++-- src/debugfile.c | 23 ++++--- src/dictstat.c | 15 +++-- src/hashcat.c | 7 +- src/hashes.c | 3 + src/hwmon.c | 1 + src/induct.c | 21 +++--- src/interface.c | 51 +++++++------- src/loopback.c | 21 +++--- src/main.c | 16 +++-- src/mpsp.c | 11 +-- src/opencl.c | 11 +-- src/outfile_check.c | 11 +-- src/potfile.c | 1 + src/restore.c | 1 + src/straight.c | 11 +-- src/terminal.c | 63 +++++++++++++++++ src/tuningdb.c | 13 ++-- src/usage.c | 1 + src/user_options.c | 48 ++++++++++--- src/wordlist.c | 11 +-- 27 files changed, 317 insertions(+), 183 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 94481a3d6..328e886b8 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -9,6 +9,7 @@ - Added self-test functionality to detect broken OpenCL runtimes on startup - Added option --self-test-disable to disable self-test functionality on startup - Added option --wordlist-autohex-disable to disable the automatical conversion of $HEX[] words from the word list +- Added option --example-hashes to show an example hash for each hash-mode ## ## Algorithms diff --git a/extra/tab_completion/hashcat.sh b/extra/tab_completion/hashcat.sh index 96d654e81..9fd3968dd 100644 --- a/extra/tab_completion/hashcat.sh +++ b/extra/tab_completion/hashcat.sh @@ -189,8 +189,8 @@ _hashcat () local BUILD_IN_CHARSETS='?l ?u ?d ?a ?b ?s ?h ?H' local SHORT_OPTS="-m -a -V -v -h -b -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O" - local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --hex-salt --hex-wordlist --hex-charset --force --status --status-timer --machine-readable --loopback --weak-hash-threshold --markov-hcstat --markov-disable --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --left --username --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --opencl-info --opencl-devices --opencl-platforms --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --nvidia-spin-damp --gpu-temp-disable --gpu-temp-abort --gpu-temp-retain --powertune-enable --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --truecrypt-keyfiles --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable" - local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --weak-hash-threshold --markov-hcstat --markov-threshold --runtime --session --timer --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --opencl-devices --opencl-platforms --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --nvidia-spin-damp --gpu-temp-abort --gpu-temp-retain -disable --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment-min --increment-max --scrypt-tmto --truecrypt-keyfiles --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable" + local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --hex-salt --hex-wordlist --hex-charset --force --status --status-timer --machine-readable --loopback --weak-hash-threshold --markov-hcstat --markov-disable --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --left --username --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --opencl-info --opencl-devices --opencl-platforms --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --nvidia-spin-damp --gpu-temp-disable --gpu-temp-abort --gpu-temp-retain --powertune-enable --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --truecrypt-keyfiles --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable" + local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --weak-hash-threshold --markov-hcstat --markov-threshold --runtime --session --timer --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --opencl-devices --opencl-platforms --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --nvidia-spin-damp --gpu-temp-abort --gpu-temp-retain --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment-min --increment-max --scrypt-tmto --truecrypt-keyfiles --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to" COMPREPLY=() local cur="${COMP_WORDS[COMP_CWORD]}" diff --git a/include/terminal.h b/include/terminal.h index 4cf4747d2..7bbcc3acf 100644 --- a/include/terminal.h +++ b/include/terminal.h @@ -41,6 +41,8 @@ int tty_fix(void); void compress_terminal_line_length (char *out_buf, const size_t keep_from_beginning, const size_t keep_from_end); +void example_hashes (hashcat_ctx_t *hashcat_ctx); + void opencl_info (hashcat_ctx_t *hashcat_ctx); void opencl_info_compact (hashcat_ctx_t *hashcat_ctx); diff --git a/include/types.h b/include/types.h index f66ca7f36..b2b39004f 100644 --- a/include/types.h +++ b/include/types.h @@ -523,6 +523,7 @@ typedef enum user_options_defaults BITMAP_MAX = 24, BITMAP_MIN = 16, DEBUG_MODE = 0, + EXAMPLE_HASHES = false, FORCE = false, GPU_TEMP_ABORT = 90, GPU_TEMP_DISABLE = false, @@ -549,6 +550,7 @@ typedef enum user_options_defaults MARKOV_THRESHOLD = 0, NONCE_ERROR_CORRECTIONS = 8, NVIDIA_SPIN_DAMP = 100, + OPENCL_INFO = false, OPENCL_VECTOR_WIDTH = 0, OPTIMIZED_KERNEL_ENABLE = false, OUTFILE_AUTOHEX = true, @@ -603,83 +605,84 @@ typedef enum user_options_map IDX_DEBUG_MODE = 0xff05, IDX_ENCODING_FROM = 0xff06, IDX_ENCODING_TO = 0xff07, - IDX_FORCE = 0xff08, - IDX_GPU_TEMP_ABORT = 0xff09, - IDX_GPU_TEMP_DISABLE = 0xff0a, - IDX_GPU_TEMP_RETAIN = 0xff0b, + IDX_EXAMPLE_HASHES = 0xff08, + IDX_FORCE = 0xff09, + IDX_GPU_TEMP_ABORT = 0xff0a, + IDX_GPU_TEMP_DISABLE = 0xff0b, + IDX_GPU_TEMP_RETAIN = 0xff0c, IDX_HASH_MODE = 'm', - IDX_HCCAPX_MESSAGE_PAIR = 0xff0c, + IDX_HCCAPX_MESSAGE_PAIR = 0xff0d, IDX_HELP = 'h', - IDX_HEX_CHARSET = 0xff0d, - IDX_HEX_SALT = 0xff0e, - IDX_HEX_WORDLIST = 0xff0f, + IDX_HEX_CHARSET = 0xff0e, + IDX_HEX_SALT = 0xff0f, + IDX_HEX_WORDLIST = 0xff10, IDX_INCREMENT = 'i', - IDX_INCREMENT_MAX = 0xff10, - IDX_INCREMENT_MIN = 0xff11, - IDX_INDUCTION_DIR = 0xff12, - IDX_KEEP_GUESSING = 0xff13, + IDX_INCREMENT_MAX = 0xff11, + IDX_INCREMENT_MIN = 0xff12, + IDX_INDUCTION_DIR = 0xff13, + IDX_KEEP_GUESSING = 0xff14, IDX_KERNEL_ACCEL = 'n', IDX_KERNEL_LOOPS = 'u', - IDX_KEYSPACE = 0xff14, - IDX_LEFT = 0xff15, + IDX_KEYSPACE = 0xff15, + IDX_LEFT = 0xff16, IDX_LIMIT = 'l', - IDX_LOGFILE_DISABLE = 0xff16, - IDX_LOOPBACK = 0xff17, - IDX_MACHINE_READABLE = 0xff18, - IDX_MARKOV_CLASSIC = 0xff19, - IDX_MARKOV_DISABLE = 0xff1a, - IDX_MARKOV_HCSTAT = 0xff1b, + IDX_LOGFILE_DISABLE = 0xff17, + IDX_LOOPBACK = 0xff18, + IDX_MACHINE_READABLE = 0xff19, + IDX_MARKOV_CLASSIC = 0xff1a, + IDX_MARKOV_DISABLE = 0xff1b, + IDX_MARKOV_HCSTAT = 0xff1c, IDX_MARKOV_THRESHOLD = 't', - IDX_NONCE_ERROR_CORRECTIONS = 0xff1c, - IDX_NVIDIA_SPIN_DAMP = 0xff1d, + IDX_NONCE_ERROR_CORRECTIONS = 0xff1d, + IDX_NVIDIA_SPIN_DAMP = 0xff1e, IDX_OPENCL_DEVICES = 'd', IDX_OPENCL_DEVICE_TYPES = 'D', IDX_OPENCL_INFO = 'I', - IDX_OPENCL_PLATFORMS = 0xff1e, - IDX_OPENCL_VECTOR_WIDTH = 0xff1f, + IDX_OPENCL_PLATFORMS = 0xff1f, + IDX_OPENCL_VECTOR_WIDTH = 0xff20, IDX_OPTIMIZED_KERNEL_ENABLE = 'O', - IDX_OUTFILE_AUTOHEX_DISABLE = 0xff20, - IDX_OUTFILE_CHECK_DIR = 0xff21, - IDX_OUTFILE_CHECK_TIMER = 0xff22, - IDX_OUTFILE_FORMAT = 0xff23, + IDX_OUTFILE_AUTOHEX_DISABLE = 0xff21, + IDX_OUTFILE_CHECK_DIR = 0xff22, + IDX_OUTFILE_CHECK_TIMER = 0xff23, + IDX_OUTFILE_FORMAT = 0xff24, IDX_OUTFILE = 'o', - IDX_WORDLIST_AUTOHEX_DISABLE = 0xff24, - IDX_POTFILE_DISABLE = 0xff25, - IDX_POTFILE_PATH = 0xff26, - IDX_POWERTUNE_ENABLE = 0xff27, - IDX_QUIET = 0xff28, - IDX_REMOVE = 0xff29, - IDX_REMOVE_TIMER = 0xff2a, - IDX_RESTORE = 0xff2b, - IDX_RESTORE_DISABLE = 0xff2c, - IDX_RESTORE_FILE_PATH = 0xff2d, + IDX_WORDLIST_AUTOHEX_DISABLE = 0xff25, + IDX_POTFILE_DISABLE = 0xff26, + IDX_POTFILE_PATH = 0xff27, + IDX_POWERTUNE_ENABLE = 0xff28, + IDX_QUIET = 0xff29, + IDX_REMOVE = 0xff2a, + IDX_REMOVE_TIMER = 0xff2b, + IDX_RESTORE = 0xff2c, + IDX_RESTORE_DISABLE = 0xff2d, + IDX_RESTORE_FILE_PATH = 0xff2e, IDX_RP_FILE = 'r', - IDX_RP_GEN_FUNC_MAX = 0xff2e, - IDX_RP_GEN_FUNC_MIN = 0xff2f, + IDX_RP_GEN_FUNC_MAX = 0xff2f, + IDX_RP_GEN_FUNC_MIN = 0xff30, IDX_RP_GEN = 'g', - IDX_RP_GEN_SEED = 0xff30, + IDX_RP_GEN_SEED = 0xff31, IDX_RULE_BUF_L = 'j', IDX_RULE_BUF_R = 'k', - IDX_RUNTIME = 0xff31, - IDX_SCRYPT_TMTO = 0xff32, - IDX_SELF_TEST_DISABLE = 0xff33, + IDX_RUNTIME = 0xff32, + IDX_SCRYPT_TMTO = 0xff33, + IDX_SELF_TEST_DISABLE = 0xff34, IDX_SEGMENT_SIZE = 'c', IDX_SEPARATOR = 'p', - IDX_SESSION = 0xff34, - IDX_SHOW = 0xff35, + IDX_SESSION = 0xff35, + IDX_SHOW = 0xff36, IDX_SKIP = 's', - IDX_STATUS = 0xff36, - IDX_STATUS_TIMER = 0xff37, - IDX_STDOUT_FLAG = 0xff38, - IDX_SPEED_ONLY = 0xff39, - IDX_PROGRESS_ONLY = 0xff3a, - IDX_TRUECRYPT_KEYFILES = 0xff3b, - IDX_USERNAME = 0xff3c, - IDX_VERACRYPT_KEYFILES = 0xff3d, - IDX_VERACRYPT_PIM = 0xff3e, + IDX_STATUS = 0xff37, + IDX_STATUS_TIMER = 0xff38, + IDX_STDOUT_FLAG = 0xff39, + IDX_SPEED_ONLY = 0xff3a, + IDX_PROGRESS_ONLY = 0xff3b, + IDX_TRUECRYPT_KEYFILES = 0xff3c, + IDX_USERNAME = 0xff3d, + IDX_VERACRYPT_KEYFILES = 0xff3e, + IDX_VERACRYPT_PIM = 0xff3f, IDX_VERSION_LOWER = 'v', IDX_VERSION = 'V', - IDX_WEAK_HASH_THRESHOLD = 0xff3f, + IDX_WEAK_HASH_THRESHOLD = 0xff40, IDX_WORKLOAD_PROFILE = 'w' } user_options_map_t; @@ -1471,6 +1474,7 @@ typedef struct user_options bool advice_disable; bool benchmark; + bool example_hashes; bool force; bool gpu_temp_disable; bool hex_charset; diff --git a/src/bitmap.c b/src/bitmap.c index 5de583830..a51e96e9f 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -78,12 +78,13 @@ int bitmap_ctx_init (hashcat_ctx_t *hashcat_ctx) bitmap_ctx->enabled = false; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; bitmap_ctx->enabled = true; diff --git a/src/combinator.c b/src/combinator.c index 316a0568a..608290730 100644 --- a/src/combinator.c +++ b/src/combinator.c @@ -20,11 +20,12 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) combinator_ctx->enabled = false; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; if ((user_options->attack_mode != ATTACK_MODE_COMBI) && (user_options->attack_mode != ATTACK_MODE_HYBRID1) diff --git a/src/cpt.c b/src/cpt.c index 72a7887c6..9d67d8220 100644 --- a/src/cpt.c +++ b/src/cpt.c @@ -16,12 +16,13 @@ int cpt_ctx_init (hashcat_ctx_t *hashcat_ctx) cpt_ctx->enabled = false; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; cpt_ctx->enabled = true; diff --git a/src/debugfile.c b/src/debugfile.c index 97cfe6836..11a521c15 100644 --- a/src/debugfile.c +++ b/src/debugfile.c @@ -87,17 +87,18 @@ int debugfile_init (hashcat_ctx_t *hashcat_ctx) debugfile_ctx->enabled = false; - if (user_options->benchmark == true) return 0; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->stdout_flag == true) return 0; - if (user_options->speed_only == true) return 0; - if (user_options->progress_only == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; - if (user_options->debug_mode == 0) return 0; + if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->stdout_flag == true) return 0; + if (user_options->speed_only == true) return 0; + if (user_options->progress_only == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; + if (user_options->debug_mode == 0) return 0; debugfile_ctx->enabled = true; diff --git a/src/dictstat.c b/src/dictstat.c index febdf385b..bc5ab6763 100644 --- a/src/dictstat.c +++ b/src/dictstat.c @@ -37,13 +37,14 @@ int dictstat_init (hashcat_ctx_t *hashcat_ctx) dictstat_ctx->enabled = false; - if (user_options->benchmark == true) return 0; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; if (user_options->attack_mode == ATTACK_MODE_BF) return 0; diff --git a/src/hashcat.c b/src/hashcat.c index 495a5f573..7ffe38c5d 100644 --- a/src/hashcat.c +++ b/src/hashcat.c @@ -415,7 +415,12 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx) const int rc_hashconfig = hashconfig_init (hashcat_ctx); - if (rc_hashconfig == -1) return -1; + if (rc_hashconfig == -1) + { + event_log_error (hashcat_ctx, "Unknown hash-type '%u' selected.", user_options->hash_mode); + + return -1; + } /** * load hashes, stage 1 diff --git a/src/hashes.c b/src/hashes.c index dac9e5e2b..5af9c7f56 100644 --- a/src/hashes.c +++ b/src/hashes.c @@ -672,6 +672,9 @@ int hashes_init_stage1 (hashcat_ctx_t *hashcat_ctx) hashes_cnt = 1; } + else if (user_options->example_hashes == true) + { + } else if (user_options->keyspace == true) { } diff --git a/src/hwmon.c b/src/hwmon.c index 10edb0592..d6c933ed4 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -3328,6 +3328,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->enabled = false; + if (user_options->example_hashes == true) return 0; if (user_options->keyspace == true) return 0; if (user_options->left == true) return 0; if (user_options->opencl_info == true) return 0; diff --git a/src/induct.c b/src/induct.c index a2022386f..ba02be91d 100644 --- a/src/induct.c +++ b/src/induct.c @@ -36,16 +36,17 @@ int induct_ctx_init (hashcat_ctx_t *hashcat_ctx) induct_ctx->enabled = false; - if (user_options->benchmark == true) return 0; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->stdout_flag == true) return 0; - if (user_options->speed_only == true) return 0; - if (user_options->progress_only == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->stdout_flag == true) return 0; + if (user_options->speed_only == true) return 0; + if (user_options->progress_only == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; if (user_options->attack_mode == ATTACK_MODE_BF) return 0; if (user_options->attack_mode == ATTACK_MODE_COMBI) return 0; diff --git a/src/interface.c b/src/interface.c index 2d25676d1..f0bbff074 100644 --- a/src/interface.c +++ b/src/interface.c @@ -34,7 +34,6 @@ static char ST_PASS_BIN_09710[] = "\x91\xb2\xe0\x62\xb9"; static char ST_PASS_BIN_09810[] = "\xb8\xf6\x36\x19\xca"; static char ST_PASS_BIN_10410[] = "\x6a\x8a\xed\xcc\xb7"; - /** * Missing self-test hashes: * @@ -24312,8 +24311,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN; break; - default: event_log_error (hashcat_ctx, "Unknown hash-type '%u' selected.", hashconfig->hash_mode); - return -1; + default: return -1; } if (user_options->hex_salt) @@ -24338,36 +24336,39 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) // some kernels do not have an optimized kernel, simply because they do not need them // or because they are not yet converted, for them we should switch off optimized mode - if (user_options->optimized_kernel_enable == true) + if (user_options->example_hashes == false) { - char source_file[256] = { 0 }; - - generate_source_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, true, folder_config->shared_dir, source_file); - - if (hc_path_read (source_file) == false) + if (user_options->optimized_kernel_enable == true) { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "%s: Optimized kernel not found, falling back to pure kernel", source_file); + char source_file[256] = { 0 }; + + generate_source_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, true, folder_config->shared_dir, source_file); + + if (hc_path_read (source_file) == false) + { + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "%s: Optimized kernel not found, falling back to pure kernel", source_file); + } + else + { + hashconfig->opti_type |= OPTI_TYPE_OPTIMIZED_KERNEL; + } } else { - hashconfig->opti_type |= OPTI_TYPE_OPTIMIZED_KERNEL; - } - } - else - { - char source_file[256] = { 0 }; + char source_file[256] = { 0 }; - generate_source_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, false, folder_config->shared_dir, source_file); + generate_source_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, false, folder_config->shared_dir, source_file); - if (hc_path_read (source_file) == false) - { - if (user_options->quiet == false) event_log_warning (hashcat_ctx, "%s: Pure kernel not found, falling back to optimized kernel", source_file); + if (hc_path_read (source_file) == false) + { + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "%s: Pure kernel not found, falling back to optimized kernel", source_file); - hashconfig->opti_type |= OPTI_TYPE_OPTIMIZED_KERNEL; - } - else - { - // nothing to do + hashconfig->opti_type |= OPTI_TYPE_OPTIMIZED_KERNEL; + } + else + { + // nothing to do + } } } diff --git a/src/loopback.c b/src/loopback.c index 7712e52ab..12b8c1998 100644 --- a/src/loopback.c +++ b/src/loopback.c @@ -60,16 +60,17 @@ int loopback_init (hashcat_ctx_t *hashcat_ctx) loopback_ctx->enabled = false; - if (user_options->benchmark == true) return 0; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->stdout_flag == true) return 0; - if (user_options->speed_only == true) return 0; - if (user_options->progress_only == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->stdout_flag == true) return 0; + if (user_options->speed_only == true) return 0; + if (user_options->progress_only == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; loopback_ctx->enabled = true; loopback_ctx->fp = NULL; diff --git a/src/main.c b/src/main.c index 480ce9926..50586284f 100644 --- a/src/main.c +++ b/src/main.c @@ -187,7 +187,7 @@ static void main_outerloop_starting (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA status_ctx->shutdown_outer = false; - if ((user_options->keyspace == false) && (user_options->stdout_flag == false) && (user_options->opencl_info == false) && (user_options->speed_only == false)) + if ((user_options->example_hashes == false) && (user_options->keyspace == false) && (user_options->stdout_flag == false) && (user_options->opencl_info == false) && (user_options->speed_only == false)) { if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK)) { @@ -250,9 +250,10 @@ static void main_cracker_finished (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYB const user_options_t *user_options = hashcat_ctx->user_options; const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; - if (user_options->keyspace == true) return; - if (user_options->opencl_info == true) return; - if (user_options->stdout_flag == true) return; + if (user_options->example_hashes == true) return; + if (user_options->keyspace == true) return; + if (user_options->opencl_info == true) return; + if (user_options->stdout_flag == true) return; // if we had a prompt, clear it @@ -1035,6 +1036,13 @@ int main (int argc, char **argv) return 0; } + if (user_options->example_hashes == true) + { + example_hashes (hashcat_ctx); + + return 0; + } + // init a hashcat session; this initializes opencl devices, hwmon, etc welcome_screen (hashcat_ctx, VERSION_TAG); diff --git a/src/mpsp.c b/src/mpsp.c index 8cc0cc5c8..0e753494d 100644 --- a/src/mpsp.c +++ b/src/mpsp.c @@ -1372,11 +1372,12 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx) mask_ctx->enabled = false; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; if (user_options->attack_mode == ATTACK_MODE_STRAIGHT) return 0; if (user_options->attack_mode == ATTACK_MODE_COMBI) return 0; diff --git a/src/opencl.c b/src/opencl.c index b000d8c4e..6b1e3271c 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -2360,11 +2360,12 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx) opencl_ctx->enabled = false; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; hc_device_param_t *devices_param = (hc_device_param_t *) hccalloc (DEVICES_MAX, sizeof (hc_device_param_t)); diff --git a/src/outfile_check.c b/src/outfile_check.c index d3bdcdd8d..50f947e6e 100644 --- a/src/outfile_check.c +++ b/src/outfile_check.c @@ -311,11 +311,12 @@ int outcheck_ctx_init (hashcat_ctx_t *hashcat_ctx) outcheck_ctx->enabled = false; - if (user_options->keyspace == true) return 0; - if (user_options->benchmark == true) return 0; - if (user_options->speed_only == true) return 0; - if (user_options->progress_only == true) return 0; - if (user_options->opencl_info == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->speed_only == true) return 0; + if (user_options->progress_only == true) return 0; + if (user_options->opencl_info == true) return 0; if (user_options->outfile_check_timer == 0) return 0; diff --git a/src/potfile.c b/src/potfile.c index 0939ee759..01ddd610e 100644 --- a/src/potfile.c +++ b/src/potfile.c @@ -64,6 +64,7 @@ int potfile_init (hashcat_ctx_t *hashcat_ctx) potfile_ctx->enabled = false; if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; if (user_options->keyspace == true) return 0; if (user_options->opencl_info == true) return 0; if (user_options->stdout_flag == true) return 0; diff --git a/src/restore.c b/src/restore.c index d7d621452..042555433 100644 --- a/src/restore.c +++ b/src/restore.c @@ -301,6 +301,7 @@ int restore_ctx_init (hashcat_ctx_t *hashcat_ctx, int argc, char **argv) restore_ctx->enabled = false; if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; if (user_options->keyspace == true) return 0; if (user_options->left == true) return 0; if (user_options->opencl_info == true) return 0; diff --git a/src/straight.c b/src/straight.c index b8d0918c2..dfc49602f 100644 --- a/src/straight.c +++ b/src/straight.c @@ -207,11 +207,12 @@ int straight_ctx_init (hashcat_ctx_t *hashcat_ctx) straight_ctx->enabled = false; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; if (user_options->attack_mode == ATTACK_MODE_BF) return 0; diff --git a/src/terminal.c b/src/terminal.c index 764caa9df..fc47805a8 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -513,6 +513,69 @@ void compress_terminal_line_length (char *out_buf, const size_t keep_from_beginn *ptr1 = 0; } +void example_hashes (hashcat_ctx_t *hashcat_ctx) +{ + user_options_t *user_options = hashcat_ctx->user_options; + + if (user_options->hash_mode_chgd == true) + { + const int rc = hashconfig_init (hashcat_ctx); + + if (rc == 0) + { + hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + + event_log_info (hashcat_ctx, "MODE: %s", strhashtype (hashconfig->hash_mode)); + + if ((hashconfig->st_hash != NULL) && (hashconfig->st_pass != NULL)) + { + event_log_info (hashcat_ctx, "HASH: %s", hashconfig->st_hash); + event_log_info (hashcat_ctx, "PASS: %s", hashconfig->st_pass); + } + else + { + event_log_info (hashcat_ctx, "HASH: not stored"); + event_log_info (hashcat_ctx, "PASS: not stored"); + } + + event_log_info (hashcat_ctx, NULL); + } + + hashconfig_destroy (hashcat_ctx); + } + else + { + for (int i = 0; i < 100000; i++) + { + user_options->hash_mode = i; + + const int rc = hashconfig_init (hashcat_ctx); + + if (rc == 0) + { + hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + + event_log_info (hashcat_ctx, "MODE: %s", strhashtype (hashconfig->hash_mode)); + + if ((hashconfig->st_hash != NULL) && (hashconfig->st_pass != NULL)) + { + event_log_info (hashcat_ctx, "HASH: %s", hashconfig->st_hash); + event_log_info (hashcat_ctx, "PASS: %s", hashconfig->st_pass); + } + else + { + event_log_info (hashcat_ctx, "HASH: not stored"); + event_log_info (hashcat_ctx, "PASS: not stored"); + } + + event_log_info (hashcat_ctx, NULL); + } + + hashconfig_destroy (hashcat_ctx); + } + } +} + void opencl_info (hashcat_ctx_t *hashcat_ctx) { const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx; diff --git a/src/tuningdb.c b/src/tuningdb.c index 39de14732..ffb699d64 100644 --- a/src/tuningdb.c +++ b/src/tuningdb.c @@ -59,12 +59,13 @@ int tuning_db_init (hashcat_ctx_t *hashcat_ctx) tuning_db->enabled = false; - if (user_options->keyspace == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->show == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->keyspace == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->show == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; tuning_db->enabled = true; diff --git a/src/usage.c b/src/usage.c index 0265d7e50..5fc35b3a1 100644 --- a/src/usage.c +++ b/src/usage.c @@ -84,6 +84,7 @@ static const char *USAGE_BIG[] = " --bitmap-min | Num | Sets minimum bits allowed for bitmaps to X | --bitmap-min=24", " --bitmap-max | Num | Sets maximum bits allowed for bitmaps to X | --bitmap-max=24", " --cpu-affinity | Str | Locks to CPU devices, separated with commas | --cpu-affinity=1,2,3", + " --example-hashes | | Show an example hash for each hash-mode |", " -I, --opencl-info | | Show info about detected OpenCL platforms/devices | -I", " --opencl-platforms | Str | OpenCL platforms to use, separated with commas | --opencl-platforms=2", " -d, --opencl-devices | Str | OpenCL devices to use, separated with commas | -d 1", diff --git a/src/user_options.c b/src/user_options.c index 766bf7fab..dc0c8b46e 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -32,6 +32,7 @@ static const struct option long_options[] = {"debug-mode", required_argument, 0, IDX_DEBUG_MODE}, {"encoding-from", required_argument, 0, IDX_ENCODING_FROM}, {"encoding-to", required_argument, 0, IDX_ENCODING_TO}, + {"example-hashes", no_argument, 0, IDX_EXAMPLE_HASHES}, {"force", no_argument, 0, IDX_FORCE}, {"generate-rules-func-max", required_argument, 0, IDX_RP_GEN_FUNC_MAX}, {"generate-rules-func-min", required_argument, 0, IDX_RP_GEN_FUNC_MIN}, @@ -139,6 +140,7 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx) user_options->custom_charset_4 = NULL; user_options->debug_file = NULL; user_options->debug_mode = DEBUG_MODE; + user_options->example_hashes = EXAMPLE_HASHES; user_options->encoding_from = ENCODING_FROM; user_options->encoding_to = ENCODING_TO; user_options->force = FORCE; @@ -171,7 +173,7 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx) user_options->nvidia_spin_damp = NVIDIA_SPIN_DAMP; user_options->opencl_devices = NULL; user_options->opencl_device_types = NULL; - user_options->opencl_info = 0; + user_options->opencl_info = OPENCL_INFO; user_options->opencl_platforms = NULL; user_options->opencl_vector_width = OPENCL_VECTOR_WIDTH; user_options->optimized_kernel_enable = OPTIMIZED_KERNEL_ENABLE; @@ -335,6 +337,7 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv) case IDX_ENCODING_TO: user_options->encoding_to = optarg; break; case IDX_INDUCTION_DIR: user_options->induction_dir = optarg; break; case IDX_OUTFILE_CHECK_DIR: user_options->outfile_check_dir = optarg; break; + case IDX_EXAMPLE_HASHES: user_options->example_hashes = true; break; case IDX_FORCE: user_options->force = true; break; case IDX_SELF_TEST_DISABLE: user_options->self_test_disable = true; break; case IDX_SKIP: user_options->skip = atoll (optarg); break; @@ -1028,6 +1031,13 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx) show_error = false; } } + else if (user_options->example_hashes == true) + { + if (user_options->hc_argc == 0) + { + show_error = false; + } + } else if (user_options->opencl_info == true) { if (user_options->hc_argc == 0) @@ -1177,14 +1187,19 @@ void user_options_session_auto (hashcat_ctx_t *hashcat_ctx) user_options->session = "benchmark"; } + if (user_options->example_hashes == true) + { + user_options->session = "example_hashes"; + } + if (user_options->speed_only == true) { - user_options->session = "speed-only"; + user_options->session = "speed_only"; } if (user_options->progress_only == true) { - user_options->session = "progress-only"; + user_options->session = "progress_only"; } if (user_options->keyspace == true) @@ -1227,11 +1242,12 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx) // some options can influence or overwrite other options - if (user_options->opencl_info == true - || user_options->keyspace == true - || user_options->stdout_flag == true - || user_options->speed_only == true - || user_options->progress_only == true) + if (user_options->example_hashes == true + || user_options->opencl_info == true + || user_options->keyspace == true + || user_options->stdout_flag == true + || user_options->speed_only == true + || user_options->progress_only == true) { user_options->gpu_temp_disable = true; user_options->left = false; @@ -1277,6 +1293,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx) } } + if (user_options->example_hashes == true) + { + user_options->quiet = true; + } + if (user_options->progress_only == true) { user_options->speed_only = true; @@ -1373,7 +1394,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx) if (user_options->attack_mode == ATTACK_MODE_BF) { - if (user_options->opencl_info == true) + if (user_options->example_hashes == true) + { + + } + else if (user_options->opencl_info == true) { } @@ -1462,6 +1487,10 @@ void user_options_extra_init (hashcat_ctx_t *hashcat_ctx) if (user_options->benchmark == true) { + } + else if (user_options->example_hashes == true) + { + } else if (user_options->opencl_info == true) { @@ -2107,6 +2136,7 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx) logfile_top_uint (user_options->bitmap_max); logfile_top_uint (user_options->bitmap_min); logfile_top_uint (user_options->debug_mode); + logfile_top_uint (user_options->example_hashes); logfile_top_uint (user_options->force); logfile_top_uint (user_options->gpu_temp_abort); logfile_top_uint (user_options->gpu_temp_disable); diff --git a/src/wordlist.c b/src/wordlist.c index 30597754e..82395c964 100644 --- a/src/wordlist.c +++ b/src/wordlist.c @@ -498,11 +498,12 @@ int wl_data_init (hashcat_ctx_t *hashcat_ctx) wl_data->enabled = false; - if (user_options->benchmark == true) return 0; - if (user_options->left == true) return 0; - if (user_options->opencl_info == true) return 0; - if (user_options->usage == true) return 0; - if (user_options->version == true) return 0; + if (user_options->benchmark == true) return 0; + if (user_options->example_hashes == true) return 0; + if (user_options->left == true) return 0; + if (user_options->opencl_info == true) return 0; + if (user_options->usage == true) return 0; + if (user_options->version == true) return 0; wl_data->enabled = true; From 18c8de3428e7a75130f5c52da1a57562eccd80fc Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 22 Aug 2017 15:19:25 +0200 Subject: [PATCH 11/12] Use zero-copy for main password buffer using CL_MEM_ALLOC_HOST_PTR and hc_clEnqueueMapBuffer() --- src/opencl.c | 62 ++++++++++++++++++++++++++++++++++++++++++-------- src/wordlist.c | 18 +++++++-------- 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/opencl.c b/src/opencl.c index 6b1e3271c..e1a4babc0 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -975,7 +975,7 @@ int hc_clReleaseContext (hashcat_ctx_t *hashcat_ctx, cl_context context) return 0; } -int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf) +int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf) { opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx; @@ -983,7 +983,7 @@ int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_ cl_int CL_err; - *buf = ocl->clEnqueueMapBuffer (command_queue, buffer, blocking_read, map_flags, offset, cb, num_events_in_wait_list, event_wait_list, event, &CL_err); + *buf = ocl->clEnqueueMapBuffer (command_queue, buffer, blocking_map, map_flags, offset, size, num_events_in_wait_list, event_wait_list, event, &CL_err); if (CL_err != CL_SUCCESS) { @@ -1794,7 +1794,17 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { - const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + int CL_rc; + + CL_rc = hc_clEnqueueUnmapMemObject (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->pws_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clEnqueueMapBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, CL_MAP_WRITE, 0, device_param->size_pws, 0, NULL, NULL, (void **) &device_param->pws_buf); if (CL_rc == -1) return -1; } @@ -1856,7 +1866,17 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } - const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + int CL_rc; + + CL_rc = hc_clEnqueueUnmapMemObject (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->pws_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clEnqueueMapBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, CL_MAP_WRITE, 0, device_param->size_pws, 0, NULL, NULL, (void **) &device_param->pws_buf); if (CL_rc == -1) return -1; } @@ -1864,13 +1884,33 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const { if (user_options->attack_mode == ATTACK_MODE_COMBI) { - const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + int CL_rc; + + CL_rc = hc_clEnqueueUnmapMemObject (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->pws_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clEnqueueMapBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, CL_MAP_WRITE, 0, device_param->size_pws, 0, NULL, NULL, (void **) &device_param->pws_buf); if (CL_rc == -1) return -1; } else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) { - const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + int CL_rc; + + CL_rc = hc_clEnqueueUnmapMemObject (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->pws_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clEnqueueMapBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, CL_MAP_WRITE, 0, device_param->size_pws, 0, NULL, NULL, (void **) &device_param->pws_buf); if (CL_rc == -1) return -1; } @@ -4791,7 +4831,8 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) * global buffers */ - CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws, NULL, &device_param->d_pws_buf); if (CL_rc == -1) return -1; + CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, + size_pws, NULL, &device_param->d_pws_buf); if (CL_rc == -1) return -1; CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws_amp, NULL, &device_param->d_pws_amp_buf); if (CL_rc == -1) return -1; CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_tmps, NULL, &device_param->d_tmps); if (CL_rc == -1) return -1; CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_hooks, NULL, &device_param->d_hooks); if (CL_rc == -1) return -1; @@ -4885,9 +4926,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) * main host data */ - pw_t *pws_buf = (pw_t *) hcmalloc (size_pws); + CL_rc = hc_clEnqueueMapBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, CL_MAP_WRITE, 0, device_param->size_pws, 0, NULL, NULL, (void **) &device_param->pws_buf); - device_param->pws_buf = pws_buf; + if (CL_rc == -1) return -1; pw_t *combs_buf = (pw_t *) hccalloc (KERNEL_COMBS, sizeof (pw_t)); @@ -5499,7 +5540,8 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx) if (device_param->skipped == true) continue; - hcfree (device_param->pws_buf); + hc_clEnqueueUnmapMemObject (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->pws_buf, 0, NULL, NULL); + hcfree (device_param->combs_buf); hcfree (device_param->hooks_buf); diff --git a/src/wordlist.c b/src/wordlist.c index 82395c964..60a5f59fb 100644 --- a/src/wordlist.c +++ b/src/wordlist.c @@ -240,8 +240,8 @@ void get_next_word (hashcat_ctx_t *hashcat_ctx, FILE *fd, char **out_buf, u32 *o void pw_add (hc_device_param_t *device_param, const u8 *pw_buf, const int pw_len) { - //if (device_param->pws_cnt < device_param->kernel_power) - //{ + if (device_param->pws_cnt < device_param->kernel_power) + { pw_t *pw = (pw_t *) device_param->pws_buf + device_param->pws_cnt; u8 *ptr = (u8 *) pw->i; @@ -253,13 +253,13 @@ void pw_add (hc_device_param_t *device_param, const u8 *pw_buf, const int pw_len pw->pw_len = pw_len; device_param->pws_cnt++; - //} - //else - //{ - // fprintf (stderr, "BUG pw_add()!!\n"); - // - // return; - //} + } + else + { + fprintf (stderr, "BUG pw_add()!!\n"); + + return; + } } int count_words (hashcat_ctx_t *hashcat_ctx, FILE *fd, const char *dictfile, u64 *result) From 7b71fb803beffda794109b2eb397121ccbb9b5a2 Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 22 Aug 2017 15:52:41 +0200 Subject: [PATCH 12/12] Add hash-mode to --example-hashes and use $HEX[] in case the password is in binary --- src/terminal.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/src/terminal.c b/src/terminal.c index fc47805a8..d34f30585 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -525,12 +525,38 @@ void example_hashes (hashcat_ctx_t *hashcat_ctx) { hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - event_log_info (hashcat_ctx, "MODE: %s", strhashtype (hashconfig->hash_mode)); + event_log_info (hashcat_ctx, "MODE: %u", hashconfig->hash_mode); + event_log_info (hashcat_ctx, "TYPE: %s", strhashtype (hashconfig->hash_mode)); if ((hashconfig->st_hash != NULL) && (hashconfig->st_pass != NULL)) { event_log_info (hashcat_ctx, "HASH: %s", hashconfig->st_hash); - event_log_info (hashcat_ctx, "PASS: %s", hashconfig->st_pass); + + if (need_hexify ((const u8 *) hashconfig->st_pass, strlen (hashconfig->st_pass), user_options->separator, 0)) + { + char tmp_buf[HCBUFSIZ_LARGE]; + + int tmp_len = 0; + + tmp_buf[tmp_len++] = '$'; + tmp_buf[tmp_len++] = 'H'; + tmp_buf[tmp_len++] = 'E'; + tmp_buf[tmp_len++] = 'X'; + tmp_buf[tmp_len++] = '['; + + exec_hexify ((const u8 *) hashconfig->st_pass, strlen (hashconfig->st_pass), (u8 *) tmp_buf + tmp_len); + + tmp_len += strlen (hashconfig->st_pass) * 2; + + tmp_buf[tmp_len++] = ']'; + tmp_buf[tmp_len++] = 0; + + event_log_info (hashcat_ctx, "PASS: %s", tmp_buf); + } + else + { + event_log_info (hashcat_ctx, "PASS: %s", hashconfig->st_pass); + } } else { @@ -555,12 +581,38 @@ void example_hashes (hashcat_ctx_t *hashcat_ctx) { hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - event_log_info (hashcat_ctx, "MODE: %s", strhashtype (hashconfig->hash_mode)); + event_log_info (hashcat_ctx, "MODE: %u", hashconfig->hash_mode); + event_log_info (hashcat_ctx, "TYPE: %s", strhashtype (hashconfig->hash_mode)); if ((hashconfig->st_hash != NULL) && (hashconfig->st_pass != NULL)) { event_log_info (hashcat_ctx, "HASH: %s", hashconfig->st_hash); - event_log_info (hashcat_ctx, "PASS: %s", hashconfig->st_pass); + + if (need_hexify ((const u8 *) hashconfig->st_pass, strlen (hashconfig->st_pass), user_options->separator, 0)) + { + char tmp_buf[HCBUFSIZ_LARGE]; + + int tmp_len = 0; + + tmp_buf[tmp_len++] = '$'; + tmp_buf[tmp_len++] = 'H'; + tmp_buf[tmp_len++] = 'E'; + tmp_buf[tmp_len++] = 'X'; + tmp_buf[tmp_len++] = '['; + + exec_hexify ((const u8 *) hashconfig->st_pass, strlen (hashconfig->st_pass), (u8 *) tmp_buf + tmp_len); + + tmp_len += strlen (hashconfig->st_pass) * 2; + + tmp_buf[tmp_len++] = ']'; + tmp_buf[tmp_len++] = 0; + + event_log_info (hashcat_ctx, "PASS: %s", tmp_buf); + } + else + { + event_log_info (hashcat_ctx, "PASS: %s", hashconfig->st_pass); + } } else {