From f96e35649d5dd39f70d3ca9969a9bc590256783f Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 22 Feb 2020 07:59:58 +0100 Subject: [PATCH] Change bitsliced kernels from 3d to 2d invocation mode for slightly better performance --- OpenCL/m01500_a3-pure.cl | 1108 +++++++++++++++++----------------- OpenCL/m03000_a3-pure.cl | 1142 ++++++++++++++++++------------------ OpenCL/m14000_a3-pure.cl | 918 +++++++++++++---------------- include/types.h | 66 +-- src/backend.c | 132 ++--- src/modules/module_01500.c | 11 +- src/modules/module_03000.c | 2 +- src/modules/module_14000.c | 2 +- src/selftest.c | 2 +- 9 files changed, 1642 insertions(+), 1741 deletions(-) diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl index ca612828d..6c45f125b 100644 --- a/OpenCL/m01500_a3-pure.cl +++ b/OpenCL/m01500_a3-pure.cl @@ -1998,296 +1998,293 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) * inner loop */ - #ifdef IS_CUDA - const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y; - #else - const u32 pc_pos = get_global_id (1); - #endif - - const u32 il_pos = pc_pos * 32; - - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - - k00 |= words_buf_s[pc_pos].b[ 0]; - k01 |= words_buf_s[pc_pos].b[ 1]; - k02 |= words_buf_s[pc_pos].b[ 2]; - k03 |= words_buf_s[pc_pos].b[ 3]; - k04 |= words_buf_s[pc_pos].b[ 4]; - k05 |= words_buf_s[pc_pos].b[ 5]; - k06 |= words_buf_s[pc_pos].b[ 6]; - k07 |= words_buf_s[pc_pos].b[ 7]; - k08 |= words_buf_s[pc_pos].b[ 8]; - k09 |= words_buf_s[pc_pos].b[ 9]; - k10 |= words_buf_s[pc_pos].b[10]; - k11 |= words_buf_s[pc_pos].b[11]; - k12 |= words_buf_s[pc_pos].b[12]; - k13 |= words_buf_s[pc_pos].b[13]; - k14 |= words_buf_s[pc_pos].b[14]; - k15 |= words_buf_s[pc_pos].b[15]; - k16 |= words_buf_s[pc_pos].b[16]; - k17 |= words_buf_s[pc_pos].b[17]; - k18 |= words_buf_s[pc_pos].b[18]; - k19 |= words_buf_s[pc_pos].b[19]; - k20 |= words_buf_s[pc_pos].b[20]; - k21 |= words_buf_s[pc_pos].b[21]; - k22 |= words_buf_s[pc_pos].b[22]; - k23 |= words_buf_s[pc_pos].b[23]; - k24 |= words_buf_s[pc_pos].b[24]; - k25 |= words_buf_s[pc_pos].b[25]; - k26 |= words_buf_s[pc_pos].b[26]; - k27 |= words_buf_s[pc_pos].b[27]; - - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0; - u32 D04 = 0; - u32 D05 = 0; - u32 D06 = 0; - u32 D07 = 0; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0; - u32 D17 = 0; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0; - u32 D23 = 0; - u32 D24 = 0; - u32 D25 = 0; - u32 D26 = 0; - u32 D27 = 0; - u32 D28 = 0; - u32 D29 = 0; - u32 D30 = 0; - u32 D31 = 0; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0; - u32 D41 = 0; - u32 D42 = 0; - u32 D43 = 0; - u32 D44 = 0; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0; - u32 D62 = 0; - u32 D63 = 0; - - DESCrypt - ( - salt, - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - K28, K29, K30, K31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, - &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, - &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, - &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, - &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, - &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, - &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, - &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 - ); - - u32 out[64]; - - out[ 0] = D00; - out[ 1] = D01; - out[ 2] = D02; - out[ 3] = D03; - out[ 4] = D04; - out[ 5] = D05; - out[ 6] = D06; - out[ 7] = D07; - out[ 8] = D08; - out[ 9] = D09; - out[10] = D10; - out[11] = D11; - out[12] = D12; - out[13] = D13; - out[14] = D14; - out[15] = D15; - out[16] = D16; - out[17] = D17; - out[18] = D18; - out[19] = D19; - out[20] = D20; - out[21] = D21; - out[22] = D22; - out[23] = D23; - out[24] = D24; - out[25] = D25; - out[26] = D26; - out[27] = D27; - out[28] = D28; - out[29] = D29; - out[30] = D30; - out[31] = D31; - out[32] = D32; - out[33] = D33; - out[34] = D34; - out[35] = D35; - out[36] = D36; - out[37] = D37; - out[38] = D38; - out[39] = D39; - out[40] = D40; - out[41] = D41; - out[42] = D42; - out[43] = D43; - out[44] = D44; - out[45] = D45; - out[46] = D46; - out[47] = D47; - out[48] = D48; - out[49] = D49; - out[50] = D50; - out[51] = D51; - out[52] = D52; - out[53] = D53; - out[54] = D54; - out[55] = D55; - out[56] = D56; - out[57] = D57; - out[58] = D58; - out[59] = D59; - out[60] = D60; - out[61] = D61; - out[62] = D62; - out[63] = D63; - - if (digests_cnt < 16) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) { - for (u32 d = 0; d < digests_cnt; d++) + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; + + const u32 pc_pos = il_pos / 32; + + k00 |= words_buf_s[pc_pos].b[ 0]; + k01 |= words_buf_s[pc_pos].b[ 1]; + k02 |= words_buf_s[pc_pos].b[ 2]; + k03 |= words_buf_s[pc_pos].b[ 3]; + k04 |= words_buf_s[pc_pos].b[ 4]; + k05 |= words_buf_s[pc_pos].b[ 5]; + k06 |= words_buf_s[pc_pos].b[ 6]; + k07 |= words_buf_s[pc_pos].b[ 7]; + k08 |= words_buf_s[pc_pos].b[ 8]; + k09 |= words_buf_s[pc_pos].b[ 9]; + k10 |= words_buf_s[pc_pos].b[10]; + k11 |= words_buf_s[pc_pos].b[11]; + k12 |= words_buf_s[pc_pos].b[12]; + k13 |= words_buf_s[pc_pos].b[13]; + k14 |= words_buf_s[pc_pos].b[14]; + k15 |= words_buf_s[pc_pos].b[15]; + k16 |= words_buf_s[pc_pos].b[16]; + k17 |= words_buf_s[pc_pos].b[17]; + k18 |= words_buf_s[pc_pos].b[18]; + k19 |= words_buf_s[pc_pos].b[19]; + k20 |= words_buf_s[pc_pos].b[20]; + k21 |= words_buf_s[pc_pos].b[21]; + k22 |= words_buf_s[pc_pos].b[22]; + k23 |= words_buf_s[pc_pos].b[23]; + k24 |= words_buf_s[pc_pos].b[24]; + k25 |= words_buf_s[pc_pos].b[25]; + k26 |= words_buf_s[pc_pos].b[26]; + k27 |= words_buf_s[pc_pos].b[27]; + + u32 D00 = 0; + u32 D01 = 0; + u32 D02 = 0; + u32 D03 = 0; + u32 D04 = 0; + u32 D05 = 0; + u32 D06 = 0; + u32 D07 = 0; + u32 D08 = 0; + u32 D09 = 0; + u32 D10 = 0; + u32 D11 = 0; + u32 D12 = 0; + u32 D13 = 0; + u32 D14 = 0; + u32 D15 = 0; + u32 D16 = 0; + u32 D17 = 0; + u32 D18 = 0; + u32 D19 = 0; + u32 D20 = 0; + u32 D21 = 0; + u32 D22 = 0; + u32 D23 = 0; + u32 D24 = 0; + u32 D25 = 0; + u32 D26 = 0; + u32 D27 = 0; + u32 D28 = 0; + u32 D29 = 0; + u32 D30 = 0; + u32 D31 = 0; + u32 D32 = 0; + u32 D33 = 0; + u32 D34 = 0; + u32 D35 = 0; + u32 D36 = 0; + u32 D37 = 0; + u32 D38 = 0; + u32 D39 = 0; + u32 D40 = 0; + u32 D41 = 0; + u32 D42 = 0; + u32 D43 = 0; + u32 D44 = 0; + u32 D45 = 0; + u32 D46 = 0; + u32 D47 = 0; + u32 D48 = 0; + u32 D49 = 0; + u32 D50 = 0; + u32 D51 = 0; + u32 D52 = 0; + u32 D53 = 0; + u32 D54 = 0; + u32 D55 = 0; + u32 D56 = 0; + u32 D57 = 0; + u32 D58 = 0; + u32 D59 = 0; + u32 D60 = 0; + u32 D61 = 0; + u32 D62 = 0; + u32 D63 = 0; + + DESCrypt + ( + salt, + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + K28, K29, K30, K31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); + + u32 out[64]; + + out[ 0] = D00; + out[ 1] = D01; + out[ 2] = D02; + out[ 3] = D03; + out[ 4] = D04; + out[ 5] = D05; + out[ 6] = D06; + out[ 7] = D07; + out[ 8] = D08; + out[ 9] = D09; + out[10] = D10; + out[11] = D11; + out[12] = D12; + out[13] = D13; + out[14] = D14; + out[15] = D15; + out[16] = D16; + out[17] = D17; + out[18] = D18; + out[19] = D19; + out[20] = D20; + out[21] = D21; + out[22] = D22; + out[23] = D23; + out[24] = D24; + out[25] = D25; + out[26] = D26; + out[27] = D27; + out[28] = D28; + out[29] = D29; + out[30] = D30; + out[31] = D31; + out[32] = D32; + out[33] = D33; + out[34] = D34; + out[35] = D35; + out[36] = D36; + out[37] = D37; + out[38] = D38; + out[39] = D39; + out[40] = D40; + out[41] = D41; + out[42] = D42; + out[43] = D43; + out[44] = D44; + out[45] = D45; + out[46] = D46; + out[47] = D47; + out[48] = D48; + out[49] = D49; + out[50] = D50; + out[51] = D51; + out[52] = D52; + out[53] = D53; + out[54] = D54; + out[55] = D55; + out[56] = D56; + out[57] = D57; + out[58] = D58; + out[59] = D59; + out[60] = D60; + out[61] = D61; + out[62] = D62; + out[63] = D63; + + if (digests_cnt < 16) { - const u32 final_hash_pos = digests_offset + d; + for (u32 d = 0; d < digests_cnt; d++) + { + const u32 final_hash_pos = digests_offset + d; - if (hashes_shown[final_hash_pos]) continue; + if (hashes_shown[final_hash_pos]) continue; - u32 search[2]; + u32 search[2]; - search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; - search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; + search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; + search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; - u32 tmpResult = 0; + u32 tmpResult = 0; + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 32; i++) + { + const u32 b0 = -((search[0] >> i) & 1); + const u32 b1 = -((search[1] >> i) & 1); + + tmpResult |= out[ 0 + i] ^ b0; + tmpResult |= out[32 + i] ^ b1; + } + + if (tmpResult == 0xffffffff) continue; + + const u32 slice = ffz (tmpResult); + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = 0; + const u32 r3 = 0; + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif + } + } + else + { + u32 out0[32]; + u32 out1[32]; #ifdef _unroll #pragma unroll #endif for (int i = 0; i < 32; i++) { - const u32 b0 = -((search[0] >> i) & 1); - const u32 b1 = -((search[1] >> i) & 1); - - tmpResult |= out[ 0 + i] ^ b0; - tmpResult |= out[32 + i] ^ b1; + out0[i] = out[ 0 + 31 - i]; + out1[i] = out[32 + 31 - i]; } - if (tmpResult == 0xffffffff) continue; + transpose32c (out0); + transpose32c (out1); - const u32 slice = ffz (tmpResult); - - const u32 r0 = search[0]; - const u32 r1 = search[1]; - const u32 r2 = 0; - const u32 r3 = 0; - - #ifdef KERNEL_STATIC - #include COMPARE_M + #ifdef _unroll + #pragma unroll #endif - } - } - else - { - u32 out0[32]; - u32 out1[32]; + for (int slice = 0; slice < 32; slice++) + { + const u32 r0 = out0[31 - slice]; + const u32 r1 = out1[31 - slice]; + const u32 r2 = 0; + #ifdef KERNEL_STATIC + const u32 r3 = 0; + #endif - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 32; i++) - { - out0[i] = out[ 0 + 31 - i]; - out1[i] = out[32 + 31 - i]; - } - - transpose32c (out0); - transpose32c (out1); - - #ifdef _unroll - #pragma unroll - #endif - for (int slice = 0; slice < 32; slice++) - { - const u32 r0 = out0[31 - slice]; - const u32 r1 = out1[31 - slice]; - const u32 r2 = 0; - #ifdef KERNEL_STATIC - const u32 r3 = 0; - #endif - - #include COMPARE_M + #include COMPARE_M + } } } } @@ -2314,70 +2311,70 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ()) const u32 s0 = digests_buf[0].digest_buf[0]; const u32 s1 = digests_buf[0].digest_buf[1]; - #define S00 (((s0 >> 0) & 1) ? -1 : 0) - #define S01 (((s0 >> 1) & 1) ? -1 : 0) - #define S02 (((s0 >> 2) & 1) ? -1 : 0) - #define S03 (((s0 >> 3) & 1) ? -1 : 0) - #define S04 (((s0 >> 4) & 1) ? -1 : 0) - #define S05 (((s0 >> 5) & 1) ? -1 : 0) - #define S06 (((s0 >> 6) & 1) ? -1 : 0) - #define S07 (((s0 >> 7) & 1) ? -1 : 0) - #define S08 (((s0 >> 8) & 1) ? -1 : 0) - #define S09 (((s0 >> 9) & 1) ? -1 : 0) - #define S10 (((s0 >> 10) & 1) ? -1 : 0) - #define S11 (((s0 >> 11) & 1) ? -1 : 0) - #define S12 (((s0 >> 12) & 1) ? -1 : 0) - #define S13 (((s0 >> 13) & 1) ? -1 : 0) - #define S14 (((s0 >> 14) & 1) ? -1 : 0) - #define S15 (((s0 >> 15) & 1) ? -1 : 0) - #define S16 (((s0 >> 16) & 1) ? -1 : 0) - #define S17 (((s0 >> 17) & 1) ? -1 : 0) - #define S18 (((s0 >> 18) & 1) ? -1 : 0) - #define S19 (((s0 >> 19) & 1) ? -1 : 0) - #define S20 (((s0 >> 20) & 1) ? -1 : 0) - #define S21 (((s0 >> 21) & 1) ? -1 : 0) - #define S22 (((s0 >> 22) & 1) ? -1 : 0) - #define S23 (((s0 >> 23) & 1) ? -1 : 0) - #define S24 (((s0 >> 24) & 1) ? -1 : 0) - #define S25 (((s0 >> 25) & 1) ? -1 : 0) - #define S26 (((s0 >> 26) & 1) ? -1 : 0) - #define S27 (((s0 >> 27) & 1) ? -1 : 0) - #define S28 (((s0 >> 28) & 1) ? -1 : 0) - #define S29 (((s0 >> 29) & 1) ? -1 : 0) - #define S30 (((s0 >> 30) & 1) ? -1 : 0) - #define S31 (((s0 >> 31) & 1) ? -1 : 0) - #define S32 (((s1 >> 0) & 1) ? -1 : 0) - #define S33 (((s1 >> 1) & 1) ? -1 : 0) - #define S34 (((s1 >> 2) & 1) ? -1 : 0) - #define S35 (((s1 >> 3) & 1) ? -1 : 0) - #define S36 (((s1 >> 4) & 1) ? -1 : 0) - #define S37 (((s1 >> 5) & 1) ? -1 : 0) - #define S38 (((s1 >> 6) & 1) ? -1 : 0) - #define S39 (((s1 >> 7) & 1) ? -1 : 0) - #define S40 (((s1 >> 8) & 1) ? -1 : 0) - #define S41 (((s1 >> 9) & 1) ? -1 : 0) - #define S42 (((s1 >> 10) & 1) ? -1 : 0) - #define S43 (((s1 >> 11) & 1) ? -1 : 0) - #define S44 (((s1 >> 12) & 1) ? -1 : 0) - #define S45 (((s1 >> 13) & 1) ? -1 : 0) - #define S46 (((s1 >> 14) & 1) ? -1 : 0) - #define S47 (((s1 >> 15) & 1) ? -1 : 0) - #define S48 (((s1 >> 16) & 1) ? -1 : 0) - #define S49 (((s1 >> 17) & 1) ? -1 : 0) - #define S50 (((s1 >> 18) & 1) ? -1 : 0) - #define S51 (((s1 >> 19) & 1) ? -1 : 0) - #define S52 (((s1 >> 20) & 1) ? -1 : 0) - #define S53 (((s1 >> 21) & 1) ? -1 : 0) - #define S54 (((s1 >> 22) & 1) ? -1 : 0) - #define S55 (((s1 >> 23) & 1) ? -1 : 0) - #define S56 (((s1 >> 24) & 1) ? -1 : 0) - #define S57 (((s1 >> 25) & 1) ? -1 : 0) - #define S58 (((s1 >> 26) & 1) ? -1 : 0) - #define S59 (((s1 >> 27) & 1) ? -1 : 0) - #define S60 (((s1 >> 28) & 1) ? -1 : 0) - #define S61 (((s1 >> 29) & 1) ? -1 : 0) - #define S62 (((s1 >> 30) & 1) ? -1 : 0) - #define S63 (((s1 >> 31) & 1) ? -1 : 0) + const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0); + const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0); + const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0); + const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0); + const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0); + const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0); + const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0); + const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0); + const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0); + const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0); + const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0); + const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0); + const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0); + const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0); + const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0); + const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0); + const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0); + const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0); + const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0); + const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0); + const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0); + const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0); + const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0); + const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0); + const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0); + const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0); + const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0); + const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0); + const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0); + const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0); + const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0); + const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0); + const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0); + const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0); + const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0); + const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0); + const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0); + const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0); + const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0); + const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0); + const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0); + const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0); + const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0); + const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0); + const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0); + const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0); + const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0); + const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0); + const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0); + const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0); + const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0); + const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0); + const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0); + const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0); + const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0); + const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0); + const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0); + const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0); + const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0); + const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0); + const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0); + const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0); + const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0); + const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0); /** * base @@ -2450,239 +2447,236 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ()) * inner loop */ - #ifdef IS_CUDA - const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y; - #else - const u32 pc_pos = get_global_id (1); - #endif + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) + { + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; - const u32 il_pos = pc_pos * 32; + const u32 pc_pos = il_pos / 32; - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; + k00 |= words_buf_s[pc_pos].b[ 0]; + k01 |= words_buf_s[pc_pos].b[ 1]; + k02 |= words_buf_s[pc_pos].b[ 2]; + k03 |= words_buf_s[pc_pos].b[ 3]; + k04 |= words_buf_s[pc_pos].b[ 4]; + k05 |= words_buf_s[pc_pos].b[ 5]; + k06 |= words_buf_s[pc_pos].b[ 6]; + k07 |= words_buf_s[pc_pos].b[ 7]; + k08 |= words_buf_s[pc_pos].b[ 8]; + k09 |= words_buf_s[pc_pos].b[ 9]; + k10 |= words_buf_s[pc_pos].b[10]; + k11 |= words_buf_s[pc_pos].b[11]; + k12 |= words_buf_s[pc_pos].b[12]; + k13 |= words_buf_s[pc_pos].b[13]; + k14 |= words_buf_s[pc_pos].b[14]; + k15 |= words_buf_s[pc_pos].b[15]; + k16 |= words_buf_s[pc_pos].b[16]; + k17 |= words_buf_s[pc_pos].b[17]; + k18 |= words_buf_s[pc_pos].b[18]; + k19 |= words_buf_s[pc_pos].b[19]; + k20 |= words_buf_s[pc_pos].b[20]; + k21 |= words_buf_s[pc_pos].b[21]; + k22 |= words_buf_s[pc_pos].b[22]; + k23 |= words_buf_s[pc_pos].b[23]; + k24 |= words_buf_s[pc_pos].b[24]; + k25 |= words_buf_s[pc_pos].b[25]; + k26 |= words_buf_s[pc_pos].b[26]; + k27 |= words_buf_s[pc_pos].b[27]; - k00 |= words_buf_s[pc_pos].b[ 0]; - k01 |= words_buf_s[pc_pos].b[ 1]; - k02 |= words_buf_s[pc_pos].b[ 2]; - k03 |= words_buf_s[pc_pos].b[ 3]; - k04 |= words_buf_s[pc_pos].b[ 4]; - k05 |= words_buf_s[pc_pos].b[ 5]; - k06 |= words_buf_s[pc_pos].b[ 6]; - k07 |= words_buf_s[pc_pos].b[ 7]; - k08 |= words_buf_s[pc_pos].b[ 8]; - k09 |= words_buf_s[pc_pos].b[ 9]; - k10 |= words_buf_s[pc_pos].b[10]; - k11 |= words_buf_s[pc_pos].b[11]; - k12 |= words_buf_s[pc_pos].b[12]; - k13 |= words_buf_s[pc_pos].b[13]; - k14 |= words_buf_s[pc_pos].b[14]; - k15 |= words_buf_s[pc_pos].b[15]; - k16 |= words_buf_s[pc_pos].b[16]; - k17 |= words_buf_s[pc_pos].b[17]; - k18 |= words_buf_s[pc_pos].b[18]; - k19 |= words_buf_s[pc_pos].b[19]; - k20 |= words_buf_s[pc_pos].b[20]; - k21 |= words_buf_s[pc_pos].b[21]; - k22 |= words_buf_s[pc_pos].b[22]; - k23 |= words_buf_s[pc_pos].b[23]; - k24 |= words_buf_s[pc_pos].b[24]; - k25 |= words_buf_s[pc_pos].b[25]; - k26 |= words_buf_s[pc_pos].b[26]; - k27 |= words_buf_s[pc_pos].b[27]; + u32 D00 = 0; + u32 D01 = 0; + u32 D02 = 0; + u32 D03 = 0; + u32 D04 = 0; + u32 D05 = 0; + u32 D06 = 0; + u32 D07 = 0; + u32 D08 = 0; + u32 D09 = 0; + u32 D10 = 0; + u32 D11 = 0; + u32 D12 = 0; + u32 D13 = 0; + u32 D14 = 0; + u32 D15 = 0; + u32 D16 = 0; + u32 D17 = 0; + u32 D18 = 0; + u32 D19 = 0; + u32 D20 = 0; + u32 D21 = 0; + u32 D22 = 0; + u32 D23 = 0; + u32 D24 = 0; + u32 D25 = 0; + u32 D26 = 0; + u32 D27 = 0; + u32 D28 = 0; + u32 D29 = 0; + u32 D30 = 0; + u32 D31 = 0; + u32 D32 = 0; + u32 D33 = 0; + u32 D34 = 0; + u32 D35 = 0; + u32 D36 = 0; + u32 D37 = 0; + u32 D38 = 0; + u32 D39 = 0; + u32 D40 = 0; + u32 D41 = 0; + u32 D42 = 0; + u32 D43 = 0; + u32 D44 = 0; + u32 D45 = 0; + u32 D46 = 0; + u32 D47 = 0; + u32 D48 = 0; + u32 D49 = 0; + u32 D50 = 0; + u32 D51 = 0; + u32 D52 = 0; + u32 D53 = 0; + u32 D54 = 0; + u32 D55 = 0; + u32 D56 = 0; + u32 D57 = 0; + u32 D58 = 0; + u32 D59 = 0; + u32 D60 = 0; + u32 D61 = 0; + u32 D62 = 0; + u32 D63 = 0; - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0; - u32 D04 = 0; - u32 D05 = 0; - u32 D06 = 0; - u32 D07 = 0; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0; - u32 D17 = 0; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0; - u32 D23 = 0; - u32 D24 = 0; - u32 D25 = 0; - u32 D26 = 0; - u32 D27 = 0; - u32 D28 = 0; - u32 D29 = 0; - u32 D30 = 0; - u32 D31 = 0; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0; - u32 D41 = 0; - u32 D42 = 0; - u32 D43 = 0; - u32 D44 = 0; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0; - u32 D62 = 0; - u32 D63 = 0; + DESCrypt + ( + salt, + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + K28, K29, K30, K31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); - DESCrypt - ( - salt, - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - K28, K29, K30, K31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, - &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, - &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, - &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, - &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, - &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, - &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, - &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 - ); + u32 tmpResult = 0; - u32 tmpResult = 0; + tmpResult |= D00 ^ S00; + tmpResult |= D01 ^ S01; + tmpResult |= D02 ^ S02; + tmpResult |= D03 ^ S03; + tmpResult |= D04 ^ S04; + tmpResult |= D05 ^ S05; + tmpResult |= D06 ^ S06; + tmpResult |= D07 ^ S07; + tmpResult |= D08 ^ S08; + tmpResult |= D09 ^ S09; + tmpResult |= D10 ^ S10; + tmpResult |= D11 ^ S11; + tmpResult |= D12 ^ S12; + tmpResult |= D13 ^ S13; + tmpResult |= D14 ^ S14; + tmpResult |= D15 ^ S15; - tmpResult |= D00 ^ S00; - tmpResult |= D01 ^ S01; - tmpResult |= D02 ^ S02; - tmpResult |= D03 ^ S03; - tmpResult |= D04 ^ S04; - tmpResult |= D05 ^ S05; - tmpResult |= D06 ^ S06; - tmpResult |= D07 ^ S07; - tmpResult |= D08 ^ S08; - tmpResult |= D09 ^ S09; - tmpResult |= D10 ^ S10; - tmpResult |= D11 ^ S11; - tmpResult |= D12 ^ S12; - tmpResult |= D13 ^ S13; - tmpResult |= D14 ^ S14; - tmpResult |= D15 ^ S15; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D16 ^ S16; + tmpResult |= D17 ^ S17; + tmpResult |= D18 ^ S18; + tmpResult |= D19 ^ S19; + tmpResult |= D20 ^ S20; + tmpResult |= D21 ^ S21; + tmpResult |= D22 ^ S22; + tmpResult |= D23 ^ S23; + tmpResult |= D24 ^ S24; + tmpResult |= D25 ^ S25; + tmpResult |= D26 ^ S26; + tmpResult |= D27 ^ S27; + tmpResult |= D28 ^ S28; + tmpResult |= D29 ^ S29; + tmpResult |= D30 ^ S30; + tmpResult |= D31 ^ S31; - tmpResult |= D16 ^ S16; - tmpResult |= D17 ^ S17; - tmpResult |= D18 ^ S18; - tmpResult |= D19 ^ S19; - tmpResult |= D20 ^ S20; - tmpResult |= D21 ^ S21; - tmpResult |= D22 ^ S22; - tmpResult |= D23 ^ S23; - tmpResult |= D24 ^ S24; - tmpResult |= D25 ^ S25; - tmpResult |= D26 ^ S26; - tmpResult |= D27 ^ S27; - tmpResult |= D28 ^ S28; - tmpResult |= D29 ^ S29; - tmpResult |= D30 ^ S30; - tmpResult |= D31 ^ S31; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D32 ^ S32; + tmpResult |= D33 ^ S33; + tmpResult |= D34 ^ S34; + tmpResult |= D35 ^ S35; + tmpResult |= D36 ^ S36; + tmpResult |= D37 ^ S37; + tmpResult |= D38 ^ S38; + tmpResult |= D39 ^ S39; + tmpResult |= D40 ^ S40; + tmpResult |= D41 ^ S41; + tmpResult |= D42 ^ S42; + tmpResult |= D43 ^ S43; + tmpResult |= D44 ^ S44; + tmpResult |= D45 ^ S45; + tmpResult |= D46 ^ S46; + tmpResult |= D47 ^ S47; - tmpResult |= D32 ^ S32; - tmpResult |= D33 ^ S33; - tmpResult |= D34 ^ S34; - tmpResult |= D35 ^ S35; - tmpResult |= D36 ^ S36; - tmpResult |= D37 ^ S37; - tmpResult |= D38 ^ S38; - tmpResult |= D39 ^ S39; - tmpResult |= D40 ^ S40; - tmpResult |= D41 ^ S41; - tmpResult |= D42 ^ S42; - tmpResult |= D43 ^ S43; - tmpResult |= D44 ^ S44; - tmpResult |= D45 ^ S45; - tmpResult |= D46 ^ S46; - tmpResult |= D47 ^ S47; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D48 ^ S48; + tmpResult |= D49 ^ S49; + tmpResult |= D50 ^ S50; + tmpResult |= D51 ^ S51; + tmpResult |= D52 ^ S52; + tmpResult |= D53 ^ S53; + tmpResult |= D54 ^ S54; + tmpResult |= D55 ^ S55; + tmpResult |= D56 ^ S56; + tmpResult |= D57 ^ S57; + tmpResult |= D58 ^ S58; + tmpResult |= D59 ^ S59; + tmpResult |= D60 ^ S60; + tmpResult |= D61 ^ S61; + tmpResult |= D62 ^ S62; + tmpResult |= D63 ^ S63; - tmpResult |= D48 ^ S48; - tmpResult |= D49 ^ S49; - tmpResult |= D50 ^ S50; - tmpResult |= D51 ^ S51; - tmpResult |= D52 ^ S52; - tmpResult |= D53 ^ S53; - tmpResult |= D54 ^ S54; - tmpResult |= D55 ^ S55; - tmpResult |= D56 ^ S56; - tmpResult |= D57 ^ S57; - tmpResult |= D58 ^ S58; - tmpResult |= D59 ^ S59; - tmpResult |= D60 ^ S60; - tmpResult |= D61 ^ S61; - tmpResult |= D62 ^ S62; - tmpResult |= D63 ^ S63; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + const u32 slice = ffz (tmpResult); - const u32 slice = ffz (tmpResult); - - #ifdef KERNEL_STATIC - #include COMPARE_S - #endif + #ifdef KERNEL_STATIC + #include COMPARE_S + #endif + } } diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl index 682edabf4..67f29aa4e 100644 --- a/OpenCL/m03000_a3-pure.cl +++ b/OpenCL/m03000_a3-pure.cl @@ -1830,305 +1830,302 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) * inner loop */ - #ifdef IS_CUDA - const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y; - #else - const u32 pc_pos = get_global_id (1); - #endif - - const u32 il_pos = pc_pos * 32; - - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - u32 k28 = K28; - u32 k29 = K29; - u32 k30 = K30; - u32 k31 = K31; - - k00 |= words_buf_s[pc_pos].b[ 0]; - k01 |= words_buf_s[pc_pos].b[ 1]; - k02 |= words_buf_s[pc_pos].b[ 2]; - k03 |= words_buf_s[pc_pos].b[ 3]; - k04 |= words_buf_s[pc_pos].b[ 4]; - k05 |= words_buf_s[pc_pos].b[ 5]; - k06 |= words_buf_s[pc_pos].b[ 6]; - k07 |= words_buf_s[pc_pos].b[ 7]; - k08 |= words_buf_s[pc_pos].b[ 8]; - k09 |= words_buf_s[pc_pos].b[ 9]; - k10 |= words_buf_s[pc_pos].b[10]; - k11 |= words_buf_s[pc_pos].b[11]; - k12 |= words_buf_s[pc_pos].b[12]; - k13 |= words_buf_s[pc_pos].b[13]; - k14 |= words_buf_s[pc_pos].b[14]; - k15 |= words_buf_s[pc_pos].b[15]; - k16 |= words_buf_s[pc_pos].b[16]; - k17 |= words_buf_s[pc_pos].b[17]; - k18 |= words_buf_s[pc_pos].b[18]; - k19 |= words_buf_s[pc_pos].b[19]; - k20 |= words_buf_s[pc_pos].b[20]; - k21 |= words_buf_s[pc_pos].b[21]; - k22 |= words_buf_s[pc_pos].b[22]; - k23 |= words_buf_s[pc_pos].b[23]; - k24 |= words_buf_s[pc_pos].b[24]; - k25 |= words_buf_s[pc_pos].b[25]; - k26 |= words_buf_s[pc_pos].b[26]; - k27 |= words_buf_s[pc_pos].b[27]; - k28 |= words_buf_s[pc_pos].b[28]; - k29 |= words_buf_s[pc_pos].b[29]; - k30 |= words_buf_s[pc_pos].b[30]; - k31 |= words_buf_s[pc_pos].b[31]; - - // KGS!@#$% including IP - - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0xffffffff; - u32 D04 = 0; - u32 D05 = 0xffffffff; - u32 D06 = 0xffffffff; - u32 D07 = 0xffffffff; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0xffffffff; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0xffffffff; - u32 D17 = 0xffffffff; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0xffffffff; - u32 D23 = 0; - u32 D24 = 0xffffffff; - u32 D25 = 0; - u32 D26 = 0xffffffff; - u32 D27 = 0; - u32 D28 = 0xffffffff; - u32 D29 = 0xffffffff; - u32 D30 = 0xffffffff; - u32 D31 = 0xffffffff; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0xffffffff; - u32 D41 = 0xffffffff; - u32 D42 = 0xffffffff; - u32 D43 = 0; - u32 D44 = 0xffffffff; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0xffffffff; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0xffffffff; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0xffffffff; - u32 D62 = 0xffffffff; - u32 D63 = 0xffffffff; - - DES - ( - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - k28, k29, k30, k31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, - &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, - &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, - &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, - &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, - &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, - &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, - &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 - ); - - u32 out[64]; - - out[ 0] = D00; - out[ 1] = D01; - out[ 2] = D02; - out[ 3] = D03; - out[ 4] = D04; - out[ 5] = D05; - out[ 6] = D06; - out[ 7] = D07; - out[ 8] = D08; - out[ 9] = D09; - out[10] = D10; - out[11] = D11; - out[12] = D12; - out[13] = D13; - out[14] = D14; - out[15] = D15; - out[16] = D16; - out[17] = D17; - out[18] = D18; - out[19] = D19; - out[20] = D20; - out[21] = D21; - out[22] = D22; - out[23] = D23; - out[24] = D24; - out[25] = D25; - out[26] = D26; - out[27] = D27; - out[28] = D28; - out[29] = D29; - out[30] = D30; - out[31] = D31; - out[32] = D32; - out[33] = D33; - out[34] = D34; - out[35] = D35; - out[36] = D36; - out[37] = D37; - out[38] = D38; - out[39] = D39; - out[40] = D40; - out[41] = D41; - out[42] = D42; - out[43] = D43; - out[44] = D44; - out[45] = D45; - out[46] = D46; - out[47] = D47; - out[48] = D48; - out[49] = D49; - out[50] = D50; - out[51] = D51; - out[52] = D52; - out[53] = D53; - out[54] = D54; - out[55] = D55; - out[56] = D56; - out[57] = D57; - out[58] = D58; - out[59] = D59; - out[60] = D60; - out[61] = D61; - out[62] = D62; - out[63] = D63; - - if (digests_cnt < 16) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) { - for (u32 d = 0; d < digests_cnt; d++) + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; + u32 k28 = K28; + u32 k29 = K29; + u32 k30 = K30; + u32 k31 = K31; + + const u32 pc_pos = il_pos / 32; + + k00 |= words_buf_s[pc_pos].b[ 0]; + k01 |= words_buf_s[pc_pos].b[ 1]; + k02 |= words_buf_s[pc_pos].b[ 2]; + k03 |= words_buf_s[pc_pos].b[ 3]; + k04 |= words_buf_s[pc_pos].b[ 4]; + k05 |= words_buf_s[pc_pos].b[ 5]; + k06 |= words_buf_s[pc_pos].b[ 6]; + k07 |= words_buf_s[pc_pos].b[ 7]; + k08 |= words_buf_s[pc_pos].b[ 8]; + k09 |= words_buf_s[pc_pos].b[ 9]; + k10 |= words_buf_s[pc_pos].b[10]; + k11 |= words_buf_s[pc_pos].b[11]; + k12 |= words_buf_s[pc_pos].b[12]; + k13 |= words_buf_s[pc_pos].b[13]; + k14 |= words_buf_s[pc_pos].b[14]; + k15 |= words_buf_s[pc_pos].b[15]; + k16 |= words_buf_s[pc_pos].b[16]; + k17 |= words_buf_s[pc_pos].b[17]; + k18 |= words_buf_s[pc_pos].b[18]; + k19 |= words_buf_s[pc_pos].b[19]; + k20 |= words_buf_s[pc_pos].b[20]; + k21 |= words_buf_s[pc_pos].b[21]; + k22 |= words_buf_s[pc_pos].b[22]; + k23 |= words_buf_s[pc_pos].b[23]; + k24 |= words_buf_s[pc_pos].b[24]; + k25 |= words_buf_s[pc_pos].b[25]; + k26 |= words_buf_s[pc_pos].b[26]; + k27 |= words_buf_s[pc_pos].b[27]; + k28 |= words_buf_s[pc_pos].b[28]; + k29 |= words_buf_s[pc_pos].b[29]; + k30 |= words_buf_s[pc_pos].b[30]; + k31 |= words_buf_s[pc_pos].b[31]; + + // KGS!@#$% including IP + + u32 D00 = 0; + u32 D01 = 0; + u32 D02 = 0; + u32 D03 = 0xffffffff; + u32 D04 = 0; + u32 D05 = 0xffffffff; + u32 D06 = 0xffffffff; + u32 D07 = 0xffffffff; + u32 D08 = 0; + u32 D09 = 0; + u32 D10 = 0; + u32 D11 = 0; + u32 D12 = 0; + u32 D13 = 0xffffffff; + u32 D14 = 0; + u32 D15 = 0; + u32 D16 = 0xffffffff; + u32 D17 = 0xffffffff; + u32 D18 = 0; + u32 D19 = 0; + u32 D20 = 0; + u32 D21 = 0; + u32 D22 = 0xffffffff; + u32 D23 = 0; + u32 D24 = 0xffffffff; + u32 D25 = 0; + u32 D26 = 0xffffffff; + u32 D27 = 0; + u32 D28 = 0xffffffff; + u32 D29 = 0xffffffff; + u32 D30 = 0xffffffff; + u32 D31 = 0xffffffff; + u32 D32 = 0; + u32 D33 = 0; + u32 D34 = 0; + u32 D35 = 0; + u32 D36 = 0; + u32 D37 = 0; + u32 D38 = 0; + u32 D39 = 0; + u32 D40 = 0xffffffff; + u32 D41 = 0xffffffff; + u32 D42 = 0xffffffff; + u32 D43 = 0; + u32 D44 = 0xffffffff; + u32 D45 = 0; + u32 D46 = 0; + u32 D47 = 0; + u32 D48 = 0; + u32 D49 = 0; + u32 D50 = 0; + u32 D51 = 0; + u32 D52 = 0; + u32 D53 = 0; + u32 D54 = 0; + u32 D55 = 0xffffffff; + u32 D56 = 0; + u32 D57 = 0; + u32 D58 = 0xffffffff; + u32 D59 = 0; + u32 D60 = 0; + u32 D61 = 0xffffffff; + u32 D62 = 0xffffffff; + u32 D63 = 0xffffffff; + + DES + ( + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + k28, k29, k30, k31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); + + u32 out[64]; + + out[ 0] = D00; + out[ 1] = D01; + out[ 2] = D02; + out[ 3] = D03; + out[ 4] = D04; + out[ 5] = D05; + out[ 6] = D06; + out[ 7] = D07; + out[ 8] = D08; + out[ 9] = D09; + out[10] = D10; + out[11] = D11; + out[12] = D12; + out[13] = D13; + out[14] = D14; + out[15] = D15; + out[16] = D16; + out[17] = D17; + out[18] = D18; + out[19] = D19; + out[20] = D20; + out[21] = D21; + out[22] = D22; + out[23] = D23; + out[24] = D24; + out[25] = D25; + out[26] = D26; + out[27] = D27; + out[28] = D28; + out[29] = D29; + out[30] = D30; + out[31] = D31; + out[32] = D32; + out[33] = D33; + out[34] = D34; + out[35] = D35; + out[36] = D36; + out[37] = D37; + out[38] = D38; + out[39] = D39; + out[40] = D40; + out[41] = D41; + out[42] = D42; + out[43] = D43; + out[44] = D44; + out[45] = D45; + out[46] = D46; + out[47] = D47; + out[48] = D48; + out[49] = D49; + out[50] = D50; + out[51] = D51; + out[52] = D52; + out[53] = D53; + out[54] = D54; + out[55] = D55; + out[56] = D56; + out[57] = D57; + out[58] = D58; + out[59] = D59; + out[60] = D60; + out[61] = D61; + out[62] = D62; + out[63] = D63; + + if (digests_cnt < 16) { - const u32 final_hash_pos = digests_offset + d; + for (u32 d = 0; d < digests_cnt; d++) + { + const u32 final_hash_pos = digests_offset + d; - if (hashes_shown[final_hash_pos]) continue; + if (hashes_shown[final_hash_pos]) continue; - u32 search[2]; + u32 search[2]; - search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; - search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; + search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; + search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; - u32 tmpResult = 0; + u32 tmpResult = 0; + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 32; i++) + { + const u32 b0 = -((search[0] >> i) & 1); + const u32 b1 = -((search[1] >> i) & 1); + + tmpResult |= out[ 0 + i] ^ b0; + tmpResult |= out[32 + i] ^ b1; + } + + if (tmpResult == 0xffffffff) continue; + + const u32 slice = ffz (tmpResult); + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = 0; + #ifdef KERNEL_STATIC + const u32 r3 = 0; + #endif + + #include COMPARE_M + } + } + else + { + u32 out0[32]; + u32 out1[32]; #ifdef _unroll #pragma unroll #endif for (int i = 0; i < 32; i++) { - const u32 b0 = -((search[0] >> i) & 1); - const u32 b1 = -((search[1] >> i) & 1); - - tmpResult |= out[ 0 + i] ^ b0; - tmpResult |= out[32 + i] ^ b1; + out0[i] = out[ 0 + 31 - i]; + out1[i] = out[32 + 31 - i]; } - if (tmpResult == 0xffffffff) continue; + transpose32c (out0); + transpose32c (out1); - const u32 slice = ffz (tmpResult); - - const u32 r0 = search[0]; - const u32 r1 = search[1]; - const u32 r2 = 0; - #ifdef KERNEL_STATIC - const u32 r3 = 0; + #ifdef _unroll + #pragma unroll #endif + for (int slice = 0; slice < 32; slice++) + { + const u32 r0 = out0[31 - slice]; + const u32 r1 = out1[31 - slice]; + const u32 r2 = 0; + #ifdef KERNEL_STATIC + const u32 r3 = 0; + #endif - #include COMPARE_M - } - } - else - { - u32 out0[32]; - u32 out1[32]; - - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 32; i++) - { - out0[i] = out[ 0 + 31 - i]; - out1[i] = out[32 + 31 - i]; - } - - transpose32c (out0); - transpose32c (out1); - - #ifdef _unroll - #pragma unroll - #endif - for (int slice = 0; slice < 32; slice++) - { - const u32 r0 = out0[31 - slice]; - const u32 r1 = out1[31 - slice]; - const u32 r2 = 0; - #ifdef KERNEL_STATIC - const u32 r3 = 0; - #endif - - #include COMPARE_M + #include COMPARE_M + } } } } @@ -2149,70 +2146,70 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ()) const u32 s0 = digests_buf[0].digest_buf[0]; const u32 s1 = digests_buf[0].digest_buf[1]; - #define S00 (((s0 >> 0) & 1) ? -1 : 0) - #define S01 (((s0 >> 1) & 1) ? -1 : 0) - #define S02 (((s0 >> 2) & 1) ? -1 : 0) - #define S03 (((s0 >> 3) & 1) ? -1 : 0) - #define S04 (((s0 >> 4) & 1) ? -1 : 0) - #define S05 (((s0 >> 5) & 1) ? -1 : 0) - #define S06 (((s0 >> 6) & 1) ? -1 : 0) - #define S07 (((s0 >> 7) & 1) ? -1 : 0) - #define S08 (((s0 >> 8) & 1) ? -1 : 0) - #define S09 (((s0 >> 9) & 1) ? -1 : 0) - #define S10 (((s0 >> 10) & 1) ? -1 : 0) - #define S11 (((s0 >> 11) & 1) ? -1 : 0) - #define S12 (((s0 >> 12) & 1) ? -1 : 0) - #define S13 (((s0 >> 13) & 1) ? -1 : 0) - #define S14 (((s0 >> 14) & 1) ? -1 : 0) - #define S15 (((s0 >> 15) & 1) ? -1 : 0) - #define S16 (((s0 >> 16) & 1) ? -1 : 0) - #define S17 (((s0 >> 17) & 1) ? -1 : 0) - #define S18 (((s0 >> 18) & 1) ? -1 : 0) - #define S19 (((s0 >> 19) & 1) ? -1 : 0) - #define S20 (((s0 >> 20) & 1) ? -1 : 0) - #define S21 (((s0 >> 21) & 1) ? -1 : 0) - #define S22 (((s0 >> 22) & 1) ? -1 : 0) - #define S23 (((s0 >> 23) & 1) ? -1 : 0) - #define S24 (((s0 >> 24) & 1) ? -1 : 0) - #define S25 (((s0 >> 25) & 1) ? -1 : 0) - #define S26 (((s0 >> 26) & 1) ? -1 : 0) - #define S27 (((s0 >> 27) & 1) ? -1 : 0) - #define S28 (((s0 >> 28) & 1) ? -1 : 0) - #define S29 (((s0 >> 29) & 1) ? -1 : 0) - #define S30 (((s0 >> 30) & 1) ? -1 : 0) - #define S31 (((s0 >> 31) & 1) ? -1 : 0) - #define S32 (((s1 >> 0) & 1) ? -1 : 0) - #define S33 (((s1 >> 1) & 1) ? -1 : 0) - #define S34 (((s1 >> 2) & 1) ? -1 : 0) - #define S35 (((s1 >> 3) & 1) ? -1 : 0) - #define S36 (((s1 >> 4) & 1) ? -1 : 0) - #define S37 (((s1 >> 5) & 1) ? -1 : 0) - #define S38 (((s1 >> 6) & 1) ? -1 : 0) - #define S39 (((s1 >> 7) & 1) ? -1 : 0) - #define S40 (((s1 >> 8) & 1) ? -1 : 0) - #define S41 (((s1 >> 9) & 1) ? -1 : 0) - #define S42 (((s1 >> 10) & 1) ? -1 : 0) - #define S43 (((s1 >> 11) & 1) ? -1 : 0) - #define S44 (((s1 >> 12) & 1) ? -1 : 0) - #define S45 (((s1 >> 13) & 1) ? -1 : 0) - #define S46 (((s1 >> 14) & 1) ? -1 : 0) - #define S47 (((s1 >> 15) & 1) ? -1 : 0) - #define S48 (((s1 >> 16) & 1) ? -1 : 0) - #define S49 (((s1 >> 17) & 1) ? -1 : 0) - #define S50 (((s1 >> 18) & 1) ? -1 : 0) - #define S51 (((s1 >> 19) & 1) ? -1 : 0) - #define S52 (((s1 >> 20) & 1) ? -1 : 0) - #define S53 (((s1 >> 21) & 1) ? -1 : 0) - #define S54 (((s1 >> 22) & 1) ? -1 : 0) - #define S55 (((s1 >> 23) & 1) ? -1 : 0) - #define S56 (((s1 >> 24) & 1) ? -1 : 0) - #define S57 (((s1 >> 25) & 1) ? -1 : 0) - #define S58 (((s1 >> 26) & 1) ? -1 : 0) - #define S59 (((s1 >> 27) & 1) ? -1 : 0) - #define S60 (((s1 >> 28) & 1) ? -1 : 0) - #define S61 (((s1 >> 29) & 1) ? -1 : 0) - #define S62 (((s1 >> 30) & 1) ? -1 : 0) - #define S63 (((s1 >> 31) & 1) ? -1 : 0) + const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0); + const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0); + const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0); + const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0); + const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0); + const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0); + const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0); + const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0); + const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0); + const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0); + const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0); + const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0); + const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0); + const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0); + const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0); + const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0); + const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0); + const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0); + const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0); + const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0); + const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0); + const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0); + const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0); + const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0); + const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0); + const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0); + const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0); + const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0); + const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0); + const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0); + const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0); + const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0); + const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0); + const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0); + const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0); + const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0); + const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0); + const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0); + const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0); + const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0); + const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0); + const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0); + const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0); + const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0); + const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0); + const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0); + const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0); + const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0); + const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0); + const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0); + const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0); + const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0); + const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0); + const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0); + const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0); + const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0); + const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0); + const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0); + const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0); + const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0); + const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0); + const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0); + const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0); + const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0); /** * base @@ -2282,248 +2279,245 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ()) * inner loop */ - #ifdef IS_CUDA - const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y; - #else - const u32 pc_pos = get_global_id (1); - #endif + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) + { + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; + u32 k28 = K28; + u32 k29 = K29; + u32 k30 = K30; + u32 k31 = K31; - const u32 il_pos = pc_pos * 32; + const u32 pc_pos = il_pos / 32; - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - u32 k28 = K28; - u32 k29 = K29; - u32 k30 = K30; - u32 k31 = K31; + k00 |= words_buf_s[pc_pos].b[ 0]; + k01 |= words_buf_s[pc_pos].b[ 1]; + k02 |= words_buf_s[pc_pos].b[ 2]; + k03 |= words_buf_s[pc_pos].b[ 3]; + k04 |= words_buf_s[pc_pos].b[ 4]; + k05 |= words_buf_s[pc_pos].b[ 5]; + k06 |= words_buf_s[pc_pos].b[ 6]; + k07 |= words_buf_s[pc_pos].b[ 7]; + k08 |= words_buf_s[pc_pos].b[ 8]; + k09 |= words_buf_s[pc_pos].b[ 9]; + k10 |= words_buf_s[pc_pos].b[10]; + k11 |= words_buf_s[pc_pos].b[11]; + k12 |= words_buf_s[pc_pos].b[12]; + k13 |= words_buf_s[pc_pos].b[13]; + k14 |= words_buf_s[pc_pos].b[14]; + k15 |= words_buf_s[pc_pos].b[15]; + k16 |= words_buf_s[pc_pos].b[16]; + k17 |= words_buf_s[pc_pos].b[17]; + k18 |= words_buf_s[pc_pos].b[18]; + k19 |= words_buf_s[pc_pos].b[19]; + k20 |= words_buf_s[pc_pos].b[20]; + k21 |= words_buf_s[pc_pos].b[21]; + k22 |= words_buf_s[pc_pos].b[22]; + k23 |= words_buf_s[pc_pos].b[23]; + k24 |= words_buf_s[pc_pos].b[24]; + k25 |= words_buf_s[pc_pos].b[25]; + k26 |= words_buf_s[pc_pos].b[26]; + k27 |= words_buf_s[pc_pos].b[27]; + k28 |= words_buf_s[pc_pos].b[28]; + k29 |= words_buf_s[pc_pos].b[29]; + k30 |= words_buf_s[pc_pos].b[30]; + k31 |= words_buf_s[pc_pos].b[31]; - k00 |= words_buf_s[pc_pos].b[ 0]; - k01 |= words_buf_s[pc_pos].b[ 1]; - k02 |= words_buf_s[pc_pos].b[ 2]; - k03 |= words_buf_s[pc_pos].b[ 3]; - k04 |= words_buf_s[pc_pos].b[ 4]; - k05 |= words_buf_s[pc_pos].b[ 5]; - k06 |= words_buf_s[pc_pos].b[ 6]; - k07 |= words_buf_s[pc_pos].b[ 7]; - k08 |= words_buf_s[pc_pos].b[ 8]; - k09 |= words_buf_s[pc_pos].b[ 9]; - k10 |= words_buf_s[pc_pos].b[10]; - k11 |= words_buf_s[pc_pos].b[11]; - k12 |= words_buf_s[pc_pos].b[12]; - k13 |= words_buf_s[pc_pos].b[13]; - k14 |= words_buf_s[pc_pos].b[14]; - k15 |= words_buf_s[pc_pos].b[15]; - k16 |= words_buf_s[pc_pos].b[16]; - k17 |= words_buf_s[pc_pos].b[17]; - k18 |= words_buf_s[pc_pos].b[18]; - k19 |= words_buf_s[pc_pos].b[19]; - k20 |= words_buf_s[pc_pos].b[20]; - k21 |= words_buf_s[pc_pos].b[21]; - k22 |= words_buf_s[pc_pos].b[22]; - k23 |= words_buf_s[pc_pos].b[23]; - k24 |= words_buf_s[pc_pos].b[24]; - k25 |= words_buf_s[pc_pos].b[25]; - k26 |= words_buf_s[pc_pos].b[26]; - k27 |= words_buf_s[pc_pos].b[27]; - k28 |= words_buf_s[pc_pos].b[28]; - k29 |= words_buf_s[pc_pos].b[29]; - k30 |= words_buf_s[pc_pos].b[30]; - k31 |= words_buf_s[pc_pos].b[31]; + // KGS!@#$% including IP - // KGS!@#$% including IP + u32 D00 = 0; + u32 D01 = 0; + u32 D02 = 0; + u32 D03 = 0xffffffff; + u32 D04 = 0; + u32 D05 = 0xffffffff; + u32 D06 = 0xffffffff; + u32 D07 = 0xffffffff; + u32 D08 = 0; + u32 D09 = 0; + u32 D10 = 0; + u32 D11 = 0; + u32 D12 = 0; + u32 D13 = 0xffffffff; + u32 D14 = 0; + u32 D15 = 0; + u32 D16 = 0xffffffff; + u32 D17 = 0xffffffff; + u32 D18 = 0; + u32 D19 = 0; + u32 D20 = 0; + u32 D21 = 0; + u32 D22 = 0xffffffff; + u32 D23 = 0; + u32 D24 = 0xffffffff; + u32 D25 = 0; + u32 D26 = 0xffffffff; + u32 D27 = 0; + u32 D28 = 0xffffffff; + u32 D29 = 0xffffffff; + u32 D30 = 0xffffffff; + u32 D31 = 0xffffffff; + u32 D32 = 0; + u32 D33 = 0; + u32 D34 = 0; + u32 D35 = 0; + u32 D36 = 0; + u32 D37 = 0; + u32 D38 = 0; + u32 D39 = 0; + u32 D40 = 0xffffffff; + u32 D41 = 0xffffffff; + u32 D42 = 0xffffffff; + u32 D43 = 0; + u32 D44 = 0xffffffff; + u32 D45 = 0; + u32 D46 = 0; + u32 D47 = 0; + u32 D48 = 0; + u32 D49 = 0; + u32 D50 = 0; + u32 D51 = 0; + u32 D52 = 0; + u32 D53 = 0; + u32 D54 = 0; + u32 D55 = 0xffffffff; + u32 D56 = 0; + u32 D57 = 0; + u32 D58 = 0xffffffff; + u32 D59 = 0; + u32 D60 = 0; + u32 D61 = 0xffffffff; + u32 D62 = 0xffffffff; + u32 D63 = 0xffffffff; - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0xffffffff; - u32 D04 = 0; - u32 D05 = 0xffffffff; - u32 D06 = 0xffffffff; - u32 D07 = 0xffffffff; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0xffffffff; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0xffffffff; - u32 D17 = 0xffffffff; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0xffffffff; - u32 D23 = 0; - u32 D24 = 0xffffffff; - u32 D25 = 0; - u32 D26 = 0xffffffff; - u32 D27 = 0; - u32 D28 = 0xffffffff; - u32 D29 = 0xffffffff; - u32 D30 = 0xffffffff; - u32 D31 = 0xffffffff; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0xffffffff; - u32 D41 = 0xffffffff; - u32 D42 = 0xffffffff; - u32 D43 = 0; - u32 D44 = 0xffffffff; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0xffffffff; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0xffffffff; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0xffffffff; - u32 D62 = 0xffffffff; - u32 D63 = 0xffffffff; + DES + ( + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + k28, k29, k30, k31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); - DES - ( - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - k28, k29, k30, k31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, - &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, - &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, - &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, - &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, - &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, - &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, - &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 - ); + u32 tmpResult = 0; - u32 tmpResult = 0; + tmpResult |= D00 ^ S00; + tmpResult |= D01 ^ S01; + tmpResult |= D02 ^ S02; + tmpResult |= D03 ^ S03; + tmpResult |= D04 ^ S04; + tmpResult |= D05 ^ S05; + tmpResult |= D06 ^ S06; + tmpResult |= D07 ^ S07; + tmpResult |= D08 ^ S08; + tmpResult |= D09 ^ S09; + tmpResult |= D10 ^ S10; + tmpResult |= D11 ^ S11; + tmpResult |= D12 ^ S12; + tmpResult |= D13 ^ S13; + tmpResult |= D14 ^ S14; + tmpResult |= D15 ^ S15; - tmpResult |= D00 ^ S00; - tmpResult |= D01 ^ S01; - tmpResult |= D02 ^ S02; - tmpResult |= D03 ^ S03; - tmpResult |= D04 ^ S04; - tmpResult |= D05 ^ S05; - tmpResult |= D06 ^ S06; - tmpResult |= D07 ^ S07; - tmpResult |= D08 ^ S08; - tmpResult |= D09 ^ S09; - tmpResult |= D10 ^ S10; - tmpResult |= D11 ^ S11; - tmpResult |= D12 ^ S12; - tmpResult |= D13 ^ S13; - tmpResult |= D14 ^ S14; - tmpResult |= D15 ^ S15; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D16 ^ S16; + tmpResult |= D17 ^ S17; + tmpResult |= D18 ^ S18; + tmpResult |= D19 ^ S19; + tmpResult |= D20 ^ S20; + tmpResult |= D21 ^ S21; + tmpResult |= D22 ^ S22; + tmpResult |= D23 ^ S23; + tmpResult |= D24 ^ S24; + tmpResult |= D25 ^ S25; + tmpResult |= D26 ^ S26; + tmpResult |= D27 ^ S27; + tmpResult |= D28 ^ S28; + tmpResult |= D29 ^ S29; + tmpResult |= D30 ^ S30; + tmpResult |= D31 ^ S31; - tmpResult |= D16 ^ S16; - tmpResult |= D17 ^ S17; - tmpResult |= D18 ^ S18; - tmpResult |= D19 ^ S19; - tmpResult |= D20 ^ S20; - tmpResult |= D21 ^ S21; - tmpResult |= D22 ^ S22; - tmpResult |= D23 ^ S23; - tmpResult |= D24 ^ S24; - tmpResult |= D25 ^ S25; - tmpResult |= D26 ^ S26; - tmpResult |= D27 ^ S27; - tmpResult |= D28 ^ S28; - tmpResult |= D29 ^ S29; - tmpResult |= D30 ^ S30; - tmpResult |= D31 ^ S31; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D32 ^ S32; + tmpResult |= D33 ^ S33; + tmpResult |= D34 ^ S34; + tmpResult |= D35 ^ S35; + tmpResult |= D36 ^ S36; + tmpResult |= D37 ^ S37; + tmpResult |= D38 ^ S38; + tmpResult |= D39 ^ S39; + tmpResult |= D40 ^ S40; + tmpResult |= D41 ^ S41; + tmpResult |= D42 ^ S42; + tmpResult |= D43 ^ S43; + tmpResult |= D44 ^ S44; + tmpResult |= D45 ^ S45; + tmpResult |= D46 ^ S46; + tmpResult |= D47 ^ S47; - tmpResult |= D32 ^ S32; - tmpResult |= D33 ^ S33; - tmpResult |= D34 ^ S34; - tmpResult |= D35 ^ S35; - tmpResult |= D36 ^ S36; - tmpResult |= D37 ^ S37; - tmpResult |= D38 ^ S38; - tmpResult |= D39 ^ S39; - tmpResult |= D40 ^ S40; - tmpResult |= D41 ^ S41; - tmpResult |= D42 ^ S42; - tmpResult |= D43 ^ S43; - tmpResult |= D44 ^ S44; - tmpResult |= D45 ^ S45; - tmpResult |= D46 ^ S46; - tmpResult |= D47 ^ S47; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D48 ^ S48; + tmpResult |= D49 ^ S49; + tmpResult |= D50 ^ S50; + tmpResult |= D51 ^ S51; + tmpResult |= D52 ^ S52; + tmpResult |= D53 ^ S53; + tmpResult |= D54 ^ S54; + tmpResult |= D55 ^ S55; + tmpResult |= D56 ^ S56; + tmpResult |= D57 ^ S57; + tmpResult |= D58 ^ S58; + tmpResult |= D59 ^ S59; + tmpResult |= D60 ^ S60; + tmpResult |= D61 ^ S61; + tmpResult |= D62 ^ S62; + tmpResult |= D63 ^ S63; - tmpResult |= D48 ^ S48; - tmpResult |= D49 ^ S49; - tmpResult |= D50 ^ S50; - tmpResult |= D51 ^ S51; - tmpResult |= D52 ^ S52; - tmpResult |= D53 ^ S53; - tmpResult |= D54 ^ S54; - tmpResult |= D55 ^ S55; - tmpResult |= D56 ^ S56; - tmpResult |= D57 ^ S57; - tmpResult |= D58 ^ S58; - tmpResult |= D59 ^ S59; - tmpResult |= D60 ^ S60; - tmpResult |= D61 ^ S61; - tmpResult |= D62 ^ S62; - tmpResult |= D63 ^ S63; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + const u32 slice = ffz (tmpResult); - const u32 slice = ffz (tmpResult); - - #ifdef KERNEL_STATIC - #include COMPARE_S - #endif + #ifdef KERNEL_STATIC + #include COMPARE_S + #endif + } } diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl index 7d1b33e8a..253a6ee86 100644 --- a/OpenCL/m14000_a3-pure.cl +++ b/OpenCL/m14000_a3-pure.cl @@ -1903,78 +1903,6 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ()) u32 D62 = d62; u32 D63 = d63; - /** - * digest - */ - - const u32 s0 = digests_buf[0].digest_buf[0]; - const u32 s1 = digests_buf[0].digest_buf[1]; - - #define S00 (((s0 >> 0) & 1) ? -1 : 0) - #define S01 (((s0 >> 1) & 1) ? -1 : 0) - #define S02 (((s0 >> 2) & 1) ? -1 : 0) - #define S03 (((s0 >> 3) & 1) ? -1 : 0) - #define S04 (((s0 >> 4) & 1) ? -1 : 0) - #define S05 (((s0 >> 5) & 1) ? -1 : 0) - #define S06 (((s0 >> 6) & 1) ? -1 : 0) - #define S07 (((s0 >> 7) & 1) ? -1 : 0) - #define S08 (((s0 >> 8) & 1) ? -1 : 0) - #define S09 (((s0 >> 9) & 1) ? -1 : 0) - #define S10 (((s0 >> 10) & 1) ? -1 : 0) - #define S11 (((s0 >> 11) & 1) ? -1 : 0) - #define S12 (((s0 >> 12) & 1) ? -1 : 0) - #define S13 (((s0 >> 13) & 1) ? -1 : 0) - #define S14 (((s0 >> 14) & 1) ? -1 : 0) - #define S15 (((s0 >> 15) & 1) ? -1 : 0) - #define S16 (((s0 >> 16) & 1) ? -1 : 0) - #define S17 (((s0 >> 17) & 1) ? -1 : 0) - #define S18 (((s0 >> 18) & 1) ? -1 : 0) - #define S19 (((s0 >> 19) & 1) ? -1 : 0) - #define S20 (((s0 >> 20) & 1) ? -1 : 0) - #define S21 (((s0 >> 21) & 1) ? -1 : 0) - #define S22 (((s0 >> 22) & 1) ? -1 : 0) - #define S23 (((s0 >> 23) & 1) ? -1 : 0) - #define S24 (((s0 >> 24) & 1) ? -1 : 0) - #define S25 (((s0 >> 25) & 1) ? -1 : 0) - #define S26 (((s0 >> 26) & 1) ? -1 : 0) - #define S27 (((s0 >> 27) & 1) ? -1 : 0) - #define S28 (((s0 >> 28) & 1) ? -1 : 0) - #define S29 (((s0 >> 29) & 1) ? -1 : 0) - #define S30 (((s0 >> 30) & 1) ? -1 : 0) - #define S31 (((s0 >> 31) & 1) ? -1 : 0) - #define S32 (((s1 >> 0) & 1) ? -1 : 0) - #define S33 (((s1 >> 1) & 1) ? -1 : 0) - #define S34 (((s1 >> 2) & 1) ? -1 : 0) - #define S35 (((s1 >> 3) & 1) ? -1 : 0) - #define S36 (((s1 >> 4) & 1) ? -1 : 0) - #define S37 (((s1 >> 5) & 1) ? -1 : 0) - #define S38 (((s1 >> 6) & 1) ? -1 : 0) - #define S39 (((s1 >> 7) & 1) ? -1 : 0) - #define S40 (((s1 >> 8) & 1) ? -1 : 0) - #define S41 (((s1 >> 9) & 1) ? -1 : 0) - #define S42 (((s1 >> 10) & 1) ? -1 : 0) - #define S43 (((s1 >> 11) & 1) ? -1 : 0) - #define S44 (((s1 >> 12) & 1) ? -1 : 0) - #define S45 (((s1 >> 13) & 1) ? -1 : 0) - #define S46 (((s1 >> 14) & 1) ? -1 : 0) - #define S47 (((s1 >> 15) & 1) ? -1 : 0) - #define S48 (((s1 >> 16) & 1) ? -1 : 0) - #define S49 (((s1 >> 17) & 1) ? -1 : 0) - #define S50 (((s1 >> 18) & 1) ? -1 : 0) - #define S51 (((s1 >> 19) & 1) ? -1 : 0) - #define S52 (((s1 >> 20) & 1) ? -1 : 0) - #define S53 (((s1 >> 21) & 1) ? -1 : 0) - #define S54 (((s1 >> 22) & 1) ? -1 : 0) - #define S55 (((s1 >> 23) & 1) ? -1 : 0) - #define S56 (((s1 >> 24) & 1) ? -1 : 0) - #define S57 (((s1 >> 25) & 1) ? -1 : 0) - #define S58 (((s1 >> 26) & 1) ? -1 : 0) - #define S59 (((s1 >> 27) & 1) ? -1 : 0) - #define S60 (((s1 >> 28) & 1) ? -1 : 0) - #define S61 (((s1 >> 29) & 1) ? -1 : 0) - #define S62 (((s1 >> 30) & 1) ? -1 : 0) - #define S63 (((s1 >> 31) & 1) ? -1 : 0) - /** * base */ @@ -2043,230 +1971,227 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ()) * inner loop */ - #ifdef IS_CUDA - const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y; - #else - const u32 pc_pos = get_global_id (1); - #endif - - const u32 il_pos = pc_pos * 32; - - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - - k00 |= words_buf_s[pc_pos].b[ 0]; - k01 |= words_buf_s[pc_pos].b[ 1]; - k02 |= words_buf_s[pc_pos].b[ 2]; - k03 |= words_buf_s[pc_pos].b[ 3]; - k04 |= words_buf_s[pc_pos].b[ 4]; - k05 |= words_buf_s[pc_pos].b[ 5]; - k06 |= words_buf_s[pc_pos].b[ 6]; - k07 |= words_buf_s[pc_pos].b[ 7]; - k08 |= words_buf_s[pc_pos].b[ 8]; - k09 |= words_buf_s[pc_pos].b[ 9]; - k10 |= words_buf_s[pc_pos].b[10]; - k11 |= words_buf_s[pc_pos].b[11]; - k12 |= words_buf_s[pc_pos].b[12]; - k13 |= words_buf_s[pc_pos].b[13]; - k14 |= words_buf_s[pc_pos].b[14]; - k15 |= words_buf_s[pc_pos].b[15]; - k16 |= words_buf_s[pc_pos].b[16]; - k17 |= words_buf_s[pc_pos].b[17]; - k18 |= words_buf_s[pc_pos].b[18]; - k19 |= words_buf_s[pc_pos].b[19]; - k20 |= words_buf_s[pc_pos].b[20]; - k21 |= words_buf_s[pc_pos].b[21]; - k22 |= words_buf_s[pc_pos].b[22]; - k23 |= words_buf_s[pc_pos].b[23]; - k24 |= words_buf_s[pc_pos].b[24]; - k25 |= words_buf_s[pc_pos].b[25]; - k26 |= words_buf_s[pc_pos].b[26]; - k27 |= words_buf_s[pc_pos].b[27]; - - DES - ( - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - K28, K29, K30, K31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, - &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, - &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, - &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, - &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, - &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, - &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, - &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 - ); - - u32 out[64]; - - out[ 0] = D00; - out[ 1] = D01; - out[ 2] = D02; - out[ 3] = D03; - out[ 4] = D04; - out[ 5] = D05; - out[ 6] = D06; - out[ 7] = D07; - out[ 8] = D08; - out[ 9] = D09; - out[10] = D10; - out[11] = D11; - out[12] = D12; - out[13] = D13; - out[14] = D14; - out[15] = D15; - out[16] = D16; - out[17] = D17; - out[18] = D18; - out[19] = D19; - out[20] = D20; - out[21] = D21; - out[22] = D22; - out[23] = D23; - out[24] = D24; - out[25] = D25; - out[26] = D26; - out[27] = D27; - out[28] = D28; - out[29] = D29; - out[30] = D30; - out[31] = D31; - out[32] = D32; - out[33] = D33; - out[34] = D34; - out[35] = D35; - out[36] = D36; - out[37] = D37; - out[38] = D38; - out[39] = D39; - out[40] = D40; - out[41] = D41; - out[42] = D42; - out[43] = D43; - out[44] = D44; - out[45] = D45; - out[46] = D46; - out[47] = D47; - out[48] = D48; - out[49] = D49; - out[50] = D50; - out[51] = D51; - out[52] = D52; - out[53] = D53; - out[54] = D54; - out[55] = D55; - out[56] = D56; - out[57] = D57; - out[58] = D58; - out[59] = D59; - out[60] = D60; - out[61] = D61; - out[62] = D62; - out[63] = D63; - - if (digests_cnt < 16) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) { - for (u32 d = 0; d < digests_cnt; d++) + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; + + const u32 pc_pos = il_pos / 32; + + k00 |= words_buf_s[pc_pos].b[ 0]; + k01 |= words_buf_s[pc_pos].b[ 1]; + k02 |= words_buf_s[pc_pos].b[ 2]; + k03 |= words_buf_s[pc_pos].b[ 3]; + k04 |= words_buf_s[pc_pos].b[ 4]; + k05 |= words_buf_s[pc_pos].b[ 5]; + k06 |= words_buf_s[pc_pos].b[ 6]; + k07 |= words_buf_s[pc_pos].b[ 7]; + k08 |= words_buf_s[pc_pos].b[ 8]; + k09 |= words_buf_s[pc_pos].b[ 9]; + k10 |= words_buf_s[pc_pos].b[10]; + k11 |= words_buf_s[pc_pos].b[11]; + k12 |= words_buf_s[pc_pos].b[12]; + k13 |= words_buf_s[pc_pos].b[13]; + k14 |= words_buf_s[pc_pos].b[14]; + k15 |= words_buf_s[pc_pos].b[15]; + k16 |= words_buf_s[pc_pos].b[16]; + k17 |= words_buf_s[pc_pos].b[17]; + k18 |= words_buf_s[pc_pos].b[18]; + k19 |= words_buf_s[pc_pos].b[19]; + k20 |= words_buf_s[pc_pos].b[20]; + k21 |= words_buf_s[pc_pos].b[21]; + k22 |= words_buf_s[pc_pos].b[22]; + k23 |= words_buf_s[pc_pos].b[23]; + k24 |= words_buf_s[pc_pos].b[24]; + k25 |= words_buf_s[pc_pos].b[25]; + k26 |= words_buf_s[pc_pos].b[26]; + k27 |= words_buf_s[pc_pos].b[27]; + + DES + ( + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + K28, K29, K30, K31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); + + u32 out[64]; + + out[ 0] = D00; + out[ 1] = D01; + out[ 2] = D02; + out[ 3] = D03; + out[ 4] = D04; + out[ 5] = D05; + out[ 6] = D06; + out[ 7] = D07; + out[ 8] = D08; + out[ 9] = D09; + out[10] = D10; + out[11] = D11; + out[12] = D12; + out[13] = D13; + out[14] = D14; + out[15] = D15; + out[16] = D16; + out[17] = D17; + out[18] = D18; + out[19] = D19; + out[20] = D20; + out[21] = D21; + out[22] = D22; + out[23] = D23; + out[24] = D24; + out[25] = D25; + out[26] = D26; + out[27] = D27; + out[28] = D28; + out[29] = D29; + out[30] = D30; + out[31] = D31; + out[32] = D32; + out[33] = D33; + out[34] = D34; + out[35] = D35; + out[36] = D36; + out[37] = D37; + out[38] = D38; + out[39] = D39; + out[40] = D40; + out[41] = D41; + out[42] = D42; + out[43] = D43; + out[44] = D44; + out[45] = D45; + out[46] = D46; + out[47] = D47; + out[48] = D48; + out[49] = D49; + out[50] = D50; + out[51] = D51; + out[52] = D52; + out[53] = D53; + out[54] = D54; + out[55] = D55; + out[56] = D56; + out[57] = D57; + out[58] = D58; + out[59] = D59; + out[60] = D60; + out[61] = D61; + out[62] = D62; + out[63] = D63; + + if (digests_cnt < 16) { - const u32 final_hash_pos = digests_offset + d; + for (u32 d = 0; d < digests_cnt; d++) + { + const u32 final_hash_pos = digests_offset + d; - if (hashes_shown[final_hash_pos]) continue; + if (hashes_shown[final_hash_pos]) continue; - u32 search[2]; + u32 search[2]; - search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; - search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; + search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; + search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; - u32 tmpResult = 0; + u32 tmpResult = 0; + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 32; i++) + { + const u32 b0 = -((search[0] >> i) & 1); + const u32 b1 = -((search[1] >> i) & 1); + + tmpResult |= out[ 0 + i] ^ b0; + tmpResult |= out[32 + i] ^ b1; + } + + if (tmpResult == 0xffffffff) continue; + + const u32 slice = ffz (tmpResult); + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = 0; + const u32 r3 = 0; + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif + } + } + else + { + u32 out0[32]; + u32 out1[32]; #ifdef _unroll #pragma unroll #endif for (int i = 0; i < 32; i++) { - const u32 b0 = -((search[0] >> i) & 1); - const u32 b1 = -((search[1] >> i) & 1); - - tmpResult |= out[ 0 + i] ^ b0; - tmpResult |= out[32 + i] ^ b1; + out0[i] = out[ 0 + i]; + out1[i] = out[32 + i]; } - if (tmpResult == 0xffffffff) continue; + transpose32c (out0); + transpose32c (out1); - const u32 slice = ffz (tmpResult); - - const u32 r0 = search[0]; - const u32 r1 = search[1]; - const u32 r2 = 0; - const u32 r3 = 0; - - #ifdef KERNEL_STATIC - #include COMPARE_M + #ifdef _unroll + #pragma unroll #endif - } - } - else - { - u32 out0[32]; - u32 out1[32]; + for (int slice = 0; slice < 32; slice++) + { + const u32 r0 = out0[slice]; + const u32 r1 = out1[slice]; + const u32 r2 = 0; + const u32 r3 = 0; - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 32; i++) - { - out0[i] = out[ 0 + i]; - out1[i] = out[32 + i]; - } - - transpose32c (out0); - transpose32c (out1); - - #ifdef _unroll - #pragma unroll - #endif - for (int slice = 0; slice < 32; slice++) - { - const u32 r0 = out0[slice]; - const u32 r1 = out1[slice]; - const u32 r2 = 0; - const u32 r3 = 0; - - #ifdef KERNEL_STATIC - #include COMPARE_M - #endif + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif + } } } } @@ -2426,70 +2351,70 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ()) const u32 s0 = digests_buf[0].digest_buf[0]; const u32 s1 = digests_buf[0].digest_buf[1]; - #define S00 (((s0 >> 0) & 1) ? -1 : 0) - #define S01 (((s0 >> 1) & 1) ? -1 : 0) - #define S02 (((s0 >> 2) & 1) ? -1 : 0) - #define S03 (((s0 >> 3) & 1) ? -1 : 0) - #define S04 (((s0 >> 4) & 1) ? -1 : 0) - #define S05 (((s0 >> 5) & 1) ? -1 : 0) - #define S06 (((s0 >> 6) & 1) ? -1 : 0) - #define S07 (((s0 >> 7) & 1) ? -1 : 0) - #define S08 (((s0 >> 8) & 1) ? -1 : 0) - #define S09 (((s0 >> 9) & 1) ? -1 : 0) - #define S10 (((s0 >> 10) & 1) ? -1 : 0) - #define S11 (((s0 >> 11) & 1) ? -1 : 0) - #define S12 (((s0 >> 12) & 1) ? -1 : 0) - #define S13 (((s0 >> 13) & 1) ? -1 : 0) - #define S14 (((s0 >> 14) & 1) ? -1 : 0) - #define S15 (((s0 >> 15) & 1) ? -1 : 0) - #define S16 (((s0 >> 16) & 1) ? -1 : 0) - #define S17 (((s0 >> 17) & 1) ? -1 : 0) - #define S18 (((s0 >> 18) & 1) ? -1 : 0) - #define S19 (((s0 >> 19) & 1) ? -1 : 0) - #define S20 (((s0 >> 20) & 1) ? -1 : 0) - #define S21 (((s0 >> 21) & 1) ? -1 : 0) - #define S22 (((s0 >> 22) & 1) ? -1 : 0) - #define S23 (((s0 >> 23) & 1) ? -1 : 0) - #define S24 (((s0 >> 24) & 1) ? -1 : 0) - #define S25 (((s0 >> 25) & 1) ? -1 : 0) - #define S26 (((s0 >> 26) & 1) ? -1 : 0) - #define S27 (((s0 >> 27) & 1) ? -1 : 0) - #define S28 (((s0 >> 28) & 1) ? -1 : 0) - #define S29 (((s0 >> 29) & 1) ? -1 : 0) - #define S30 (((s0 >> 30) & 1) ? -1 : 0) - #define S31 (((s0 >> 31) & 1) ? -1 : 0) - #define S32 (((s1 >> 0) & 1) ? -1 : 0) - #define S33 (((s1 >> 1) & 1) ? -1 : 0) - #define S34 (((s1 >> 2) & 1) ? -1 : 0) - #define S35 (((s1 >> 3) & 1) ? -1 : 0) - #define S36 (((s1 >> 4) & 1) ? -1 : 0) - #define S37 (((s1 >> 5) & 1) ? -1 : 0) - #define S38 (((s1 >> 6) & 1) ? -1 : 0) - #define S39 (((s1 >> 7) & 1) ? -1 : 0) - #define S40 (((s1 >> 8) & 1) ? -1 : 0) - #define S41 (((s1 >> 9) & 1) ? -1 : 0) - #define S42 (((s1 >> 10) & 1) ? -1 : 0) - #define S43 (((s1 >> 11) & 1) ? -1 : 0) - #define S44 (((s1 >> 12) & 1) ? -1 : 0) - #define S45 (((s1 >> 13) & 1) ? -1 : 0) - #define S46 (((s1 >> 14) & 1) ? -1 : 0) - #define S47 (((s1 >> 15) & 1) ? -1 : 0) - #define S48 (((s1 >> 16) & 1) ? -1 : 0) - #define S49 (((s1 >> 17) & 1) ? -1 : 0) - #define S50 (((s1 >> 18) & 1) ? -1 : 0) - #define S51 (((s1 >> 19) & 1) ? -1 : 0) - #define S52 (((s1 >> 20) & 1) ? -1 : 0) - #define S53 (((s1 >> 21) & 1) ? -1 : 0) - #define S54 (((s1 >> 22) & 1) ? -1 : 0) - #define S55 (((s1 >> 23) & 1) ? -1 : 0) - #define S56 (((s1 >> 24) & 1) ? -1 : 0) - #define S57 (((s1 >> 25) & 1) ? -1 : 0) - #define S58 (((s1 >> 26) & 1) ? -1 : 0) - #define S59 (((s1 >> 27) & 1) ? -1 : 0) - #define S60 (((s1 >> 28) & 1) ? -1 : 0) - #define S61 (((s1 >> 29) & 1) ? -1 : 0) - #define S62 (((s1 >> 30) & 1) ? -1 : 0) - #define S63 (((s1 >> 31) & 1) ? -1 : 0) + const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0); + const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0); + const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0); + const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0); + const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0); + const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0); + const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0); + const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0); + const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0); + const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0); + const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0); + const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0); + const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0); + const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0); + const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0); + const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0); + const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0); + const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0); + const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0); + const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0); + const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0); + const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0); + const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0); + const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0); + const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0); + const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0); + const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0); + const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0); + const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0); + const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0); + const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0); + const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0); + const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0); + const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0); + const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0); + const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0); + const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0); + const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0); + const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0); + const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0); + const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0); + const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0); + const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0); + const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0); + const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0); + const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0); + const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0); + const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0); + const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0); + const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0); + const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0); + const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0); + const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0); + const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0); + const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0); + const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0); + const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0); + const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0); + const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0); + const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0); + const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0); + const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0); + const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0); + const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0); /** * base @@ -2559,173 +2484,170 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ()) * inner loop */ - #ifdef IS_CUDA - const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y; - #else - const u32 pc_pos = get_global_id (1); - #endif + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) + { + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; - const u32 il_pos = pc_pos * 32; + const u32 pc_pos = il_pos / 32; - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; + k00 |= words_buf_s[pc_pos].b[ 0]; + k01 |= words_buf_s[pc_pos].b[ 1]; + k02 |= words_buf_s[pc_pos].b[ 2]; + k03 |= words_buf_s[pc_pos].b[ 3]; + k04 |= words_buf_s[pc_pos].b[ 4]; + k05 |= words_buf_s[pc_pos].b[ 5]; + k06 |= words_buf_s[pc_pos].b[ 6]; + k07 |= words_buf_s[pc_pos].b[ 7]; + k08 |= words_buf_s[pc_pos].b[ 8]; + k09 |= words_buf_s[pc_pos].b[ 9]; + k10 |= words_buf_s[pc_pos].b[10]; + k11 |= words_buf_s[pc_pos].b[11]; + k12 |= words_buf_s[pc_pos].b[12]; + k13 |= words_buf_s[pc_pos].b[13]; + k14 |= words_buf_s[pc_pos].b[14]; + k15 |= words_buf_s[pc_pos].b[15]; + k16 |= words_buf_s[pc_pos].b[16]; + k17 |= words_buf_s[pc_pos].b[17]; + k18 |= words_buf_s[pc_pos].b[18]; + k19 |= words_buf_s[pc_pos].b[19]; + k20 |= words_buf_s[pc_pos].b[20]; + k21 |= words_buf_s[pc_pos].b[21]; + k22 |= words_buf_s[pc_pos].b[22]; + k23 |= words_buf_s[pc_pos].b[23]; + k24 |= words_buf_s[pc_pos].b[24]; + k25 |= words_buf_s[pc_pos].b[25]; + k26 |= words_buf_s[pc_pos].b[26]; + k27 |= words_buf_s[pc_pos].b[27]; - k00 |= words_buf_s[pc_pos].b[ 0]; - k01 |= words_buf_s[pc_pos].b[ 1]; - k02 |= words_buf_s[pc_pos].b[ 2]; - k03 |= words_buf_s[pc_pos].b[ 3]; - k04 |= words_buf_s[pc_pos].b[ 4]; - k05 |= words_buf_s[pc_pos].b[ 5]; - k06 |= words_buf_s[pc_pos].b[ 6]; - k07 |= words_buf_s[pc_pos].b[ 7]; - k08 |= words_buf_s[pc_pos].b[ 8]; - k09 |= words_buf_s[pc_pos].b[ 9]; - k10 |= words_buf_s[pc_pos].b[10]; - k11 |= words_buf_s[pc_pos].b[11]; - k12 |= words_buf_s[pc_pos].b[12]; - k13 |= words_buf_s[pc_pos].b[13]; - k14 |= words_buf_s[pc_pos].b[14]; - k15 |= words_buf_s[pc_pos].b[15]; - k16 |= words_buf_s[pc_pos].b[16]; - k17 |= words_buf_s[pc_pos].b[17]; - k18 |= words_buf_s[pc_pos].b[18]; - k19 |= words_buf_s[pc_pos].b[19]; - k20 |= words_buf_s[pc_pos].b[20]; - k21 |= words_buf_s[pc_pos].b[21]; - k22 |= words_buf_s[pc_pos].b[22]; - k23 |= words_buf_s[pc_pos].b[23]; - k24 |= words_buf_s[pc_pos].b[24]; - k25 |= words_buf_s[pc_pos].b[25]; - k26 |= words_buf_s[pc_pos].b[26]; - k27 |= words_buf_s[pc_pos].b[27]; + DES + ( + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + K28, K29, K30, K31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); - DES - ( - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - K28, K29, K30, K31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, - &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, - &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, - &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, - &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, - &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, - &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, - &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 - ); + u32 tmpResult = 0; - u32 tmpResult = 0; + tmpResult |= D00 ^ S00; + tmpResult |= D01 ^ S01; + tmpResult |= D02 ^ S02; + tmpResult |= D03 ^ S03; + tmpResult |= D04 ^ S04; + tmpResult |= D05 ^ S05; + tmpResult |= D06 ^ S06; + tmpResult |= D07 ^ S07; + tmpResult |= D08 ^ S08; + tmpResult |= D09 ^ S09; + tmpResult |= D10 ^ S10; + tmpResult |= D11 ^ S11; + tmpResult |= D12 ^ S12; + tmpResult |= D13 ^ S13; + tmpResult |= D14 ^ S14; + tmpResult |= D15 ^ S15; - tmpResult |= D00 ^ S00; - tmpResult |= D01 ^ S01; - tmpResult |= D02 ^ S02; - tmpResult |= D03 ^ S03; - tmpResult |= D04 ^ S04; - tmpResult |= D05 ^ S05; - tmpResult |= D06 ^ S06; - tmpResult |= D07 ^ S07; - tmpResult |= D08 ^ S08; - tmpResult |= D09 ^ S09; - tmpResult |= D10 ^ S10; - tmpResult |= D11 ^ S11; - tmpResult |= D12 ^ S12; - tmpResult |= D13 ^ S13; - tmpResult |= D14 ^ S14; - tmpResult |= D15 ^ S15; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D16 ^ S16; + tmpResult |= D17 ^ S17; + tmpResult |= D18 ^ S18; + tmpResult |= D19 ^ S19; + tmpResult |= D20 ^ S20; + tmpResult |= D21 ^ S21; + tmpResult |= D22 ^ S22; + tmpResult |= D23 ^ S23; + tmpResult |= D24 ^ S24; + tmpResult |= D25 ^ S25; + tmpResult |= D26 ^ S26; + tmpResult |= D27 ^ S27; + tmpResult |= D28 ^ S28; + tmpResult |= D29 ^ S29; + tmpResult |= D30 ^ S30; + tmpResult |= D31 ^ S31; - tmpResult |= D16 ^ S16; - tmpResult |= D17 ^ S17; - tmpResult |= D18 ^ S18; - tmpResult |= D19 ^ S19; - tmpResult |= D20 ^ S20; - tmpResult |= D21 ^ S21; - tmpResult |= D22 ^ S22; - tmpResult |= D23 ^ S23; - tmpResult |= D24 ^ S24; - tmpResult |= D25 ^ S25; - tmpResult |= D26 ^ S26; - tmpResult |= D27 ^ S27; - tmpResult |= D28 ^ S28; - tmpResult |= D29 ^ S29; - tmpResult |= D30 ^ S30; - tmpResult |= D31 ^ S31; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D32 ^ S32; + tmpResult |= D33 ^ S33; + tmpResult |= D34 ^ S34; + tmpResult |= D35 ^ S35; + tmpResult |= D36 ^ S36; + tmpResult |= D37 ^ S37; + tmpResult |= D38 ^ S38; + tmpResult |= D39 ^ S39; + tmpResult |= D40 ^ S40; + tmpResult |= D41 ^ S41; + tmpResult |= D42 ^ S42; + tmpResult |= D43 ^ S43; + tmpResult |= D44 ^ S44; + tmpResult |= D45 ^ S45; + tmpResult |= D46 ^ S46; + tmpResult |= D47 ^ S47; - tmpResult |= D32 ^ S32; - tmpResult |= D33 ^ S33; - tmpResult |= D34 ^ S34; - tmpResult |= D35 ^ S35; - tmpResult |= D36 ^ S36; - tmpResult |= D37 ^ S37; - tmpResult |= D38 ^ S38; - tmpResult |= D39 ^ S39; - tmpResult |= D40 ^ S40; - tmpResult |= D41 ^ S41; - tmpResult |= D42 ^ S42; - tmpResult |= D43 ^ S43; - tmpResult |= D44 ^ S44; - tmpResult |= D45 ^ S45; - tmpResult |= D46 ^ S46; - tmpResult |= D47 ^ S47; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + tmpResult |= D48 ^ S48; + tmpResult |= D49 ^ S49; + tmpResult |= D50 ^ S50; + tmpResult |= D51 ^ S51; + tmpResult |= D52 ^ S52; + tmpResult |= D53 ^ S53; + tmpResult |= D54 ^ S54; + tmpResult |= D55 ^ S55; + tmpResult |= D56 ^ S56; + tmpResult |= D57 ^ S57; + tmpResult |= D58 ^ S58; + tmpResult |= D59 ^ S59; + tmpResult |= D60 ^ S60; + tmpResult |= D61 ^ S61; + tmpResult |= D62 ^ S62; + tmpResult |= D63 ^ S63; - tmpResult |= D48 ^ S48; - tmpResult |= D49 ^ S49; - tmpResult |= D50 ^ S50; - tmpResult |= D51 ^ S51; - tmpResult |= D52 ^ S52; - tmpResult |= D53 ^ S53; - tmpResult |= D54 ^ S54; - tmpResult |= D55 ^ S55; - tmpResult |= D56 ^ S56; - tmpResult |= D57 ^ S57; - tmpResult |= D58 ^ S58; - tmpResult |= D59 ^ S59; - tmpResult |= D60 ^ S60; - tmpResult |= D61 ^ S61; - tmpResult |= D62 ^ S62; - tmpResult |= D63 ^ S63; + if (tmpResult == 0xffffffff) continue; - if (tmpResult == 0xffffffff) return; + const u32 slice = ffz (tmpResult); - const u32 slice = ffz (tmpResult); - - #ifdef KERNEL_STATIC - #include COMPARE_S - #endif + #ifdef KERNEL_STATIC + #include COMPARE_S + #endif + } } diff --git a/include/types.h b/include/types.h index ebcd53319..fabf9f063 100644 --- a/include/types.h +++ b/include/types.h @@ -392,40 +392,40 @@ typedef enum opts_type OPTS_TYPE_PT_GENERATE_LE = (1ULL << 9), OPTS_TYPE_PT_GENERATE_BE = (1ULL << 10), OPTS_TYPE_PT_NEVERCRACK = (1ULL << 11), // if we want all possible results - OPTS_TYPE_PT_BITSLICE = (1ULL << 12), - OPTS_TYPE_PT_ALWAYS_ASCII = (1ULL << 13), - OPTS_TYPE_PT_ALWAYS_HEXIFY = (1ULL << 14), - OPTS_TYPE_PT_LM = (1ULL << 15), // special handling: all lower, 7 max, ... - OPTS_TYPE_PT_HEX = (1ULL << 16), // input wordlist (and masks!) are always in hex - OPTS_TYPE_ST_UTF16LE = (1ULL << 17), - OPTS_TYPE_ST_UTF16BE = (1ULL << 18), - OPTS_TYPE_ST_UPPER = (1ULL << 19), - OPTS_TYPE_ST_LOWER = (1ULL << 20), - OPTS_TYPE_ST_ADD01 = (1ULL << 21), - OPTS_TYPE_ST_ADD02 = (1ULL << 22), - OPTS_TYPE_ST_ADD80 = (1ULL << 23), - OPTS_TYPE_ST_ADDBITS14 = (1ULL << 24), - OPTS_TYPE_ST_ADDBITS15 = (1ULL << 25), - OPTS_TYPE_ST_HEX = (1ULL << 26), - OPTS_TYPE_ST_BASE64 = (1ULL << 27), - OPTS_TYPE_ST_HASH_MD5 = (1ULL << 28), - OPTS_TYPE_HASH_COPY = (1ULL << 29), - OPTS_TYPE_HASH_SPLIT = (1ULL << 30), - OPTS_TYPE_LOOP_EXTENDED = (1ULL << 31), // a kernel which is called each time normal _loop kernel finished. + OPTS_TYPE_PT_ALWAYS_ASCII = (1ULL << 12), + OPTS_TYPE_PT_ALWAYS_HEXIFY = (1ULL << 13), + OPTS_TYPE_PT_LM = (1ULL << 14), // special handling: all lower, 7 max, ... + OPTS_TYPE_PT_HEX = (1ULL << 15), // input wordlist (and masks!) are always in hex + OPTS_TYPE_ST_UTF16LE = (1ULL << 16), + OPTS_TYPE_ST_UTF16BE = (1ULL << 17), + OPTS_TYPE_ST_UPPER = (1ULL << 18), + OPTS_TYPE_ST_LOWER = (1ULL << 19), + OPTS_TYPE_ST_ADD01 = (1ULL << 20), + OPTS_TYPE_ST_ADD02 = (1ULL << 21), + OPTS_TYPE_ST_ADD80 = (1ULL << 22), + OPTS_TYPE_ST_ADDBITS14 = (1ULL << 23), + OPTS_TYPE_ST_ADDBITS15 = (1ULL << 24), + OPTS_TYPE_ST_HEX = (1ULL << 25), + OPTS_TYPE_ST_BASE64 = (1ULL << 26), + OPTS_TYPE_ST_HASH_MD5 = (1ULL << 27), + OPTS_TYPE_HASH_COPY = (1ULL << 28), + OPTS_TYPE_HASH_SPLIT = (1ULL << 29), + OPTS_TYPE_LOOP_EXTENDED = (1ULL << 30), // a kernel which is called each time normal _loop kernel finished. // but unlike a hook kernel this kernel is called for every _loop iteration offset - OPTS_TYPE_HOOK12 = (1ULL << 32), - OPTS_TYPE_HOOK23 = (1ULL << 33), - OPTS_TYPE_INIT2 = (1ULL << 34), - OPTS_TYPE_LOOP2 = (1ULL << 35), - OPTS_TYPE_AUX1 = (1ULL << 36), - OPTS_TYPE_AUX2 = (1ULL << 37), - OPTS_TYPE_AUX3 = (1ULL << 38), - OPTS_TYPE_AUX4 = (1ULL << 39), - OPTS_TYPE_BINARY_HASHFILE = (1ULL << 40), - OPTS_TYPE_PREFERED_THREAD = (1ULL << 41), // some algorithms (complicated ones with many branches) benefit from this - OPTS_TYPE_PT_ADD06 = (1ULL << 42), - OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 43), - OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 44), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately + OPTS_TYPE_HOOK12 = (1ULL << 31), + OPTS_TYPE_HOOK23 = (1ULL << 32), + OPTS_TYPE_INIT2 = (1ULL << 33), + OPTS_TYPE_LOOP2 = (1ULL << 34), + OPTS_TYPE_AUX1 = (1ULL << 35), + OPTS_TYPE_AUX2 = (1ULL << 36), + OPTS_TYPE_AUX3 = (1ULL << 37), + OPTS_TYPE_AUX4 = (1ULL << 38), + OPTS_TYPE_BINARY_HASHFILE = (1ULL << 39), + OPTS_TYPE_PREFERED_THREAD = (1ULL << 40), // some algorithms (complicated ones with many branches) benefit from this + OPTS_TYPE_PT_ADD06 = (1ULL << 41), + OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 42), + OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 43), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately + OPTS_TYPE_TM_KERNEL = (1ULL << 44), OPTS_TYPE_SUGGEST_KG = (1ULL << 45), // suggest keep guessing for modules the user maybe wants to use --keep-guessing OPTS_TYPE_COPY_TMPS = (1ULL << 46), // if we want to use data from tmps buffer (for example get the PMK in WPA) OPTS_TYPE_POTFILE_NOPASS = (1ULL << 47), // sometimes the password should not be printed to potfile diff --git a/src/backend.c b/src/backend.c index 1ef4a5761..a8d4d2bae 100644 --- a/src/backend.c +++ b/src/backend.c @@ -2868,7 +2868,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, } else { - if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) { const u32 size_tm = device_param->size_tm; @@ -3430,7 +3430,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con { const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; const status_ctx_t *status_ctx = hashcat_ctx->status_ctx; - const user_options_t *user_options = hashcat_ctx->user_options; u64 kernel_threads = 0; u64 dynamic_shared_mem = 0; @@ -3544,44 +3543,33 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con num_elements = CEILDIV (num_elements, kernel_threads); - if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF)) + if (kern_run == KERN_RUN_1) { - if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1; - - if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1; - - if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1; + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } } - else + else if (kern_run == KERN_RUN_2) { - if (kern_run == KERN_RUN_1) + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) - { - num_elements = CEILDIV (num_elements, device_param->vector_width); - } + num_elements = CEILDIV (num_elements, device_param->vector_width); } - else if (kern_run == KERN_RUN_2) - { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) - { - num_elements = CEILDIV (num_elements, device_param->vector_width); - } - } - else if (kern_run == KERN_RUN_3) - { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) - { - num_elements = CEILDIV (num_elements, device_param->vector_width); - } - } - - if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1; - - if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1; - - if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1; } + else if (kern_run == KERN_RUN_3) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + + if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1; + + if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1; + + if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1; if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; @@ -3651,44 +3639,34 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con cl_event opencl_event; - if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF)) + if (kern_run == KERN_RUN_1) { - const size_t global_work_size[3] = { num_elements, 32, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1; + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } } - else + else if (kern_run == KERN_RUN_2) { - if (kern_run == KERN_RUN_1) + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) - { - num_elements = CEILDIV (num_elements, device_param->vector_width); - } + num_elements = CEILDIV (num_elements, device_param->vector_width); } - else if (kern_run == KERN_RUN_2) - { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) - { - num_elements = CEILDIV (num_elements, device_param->vector_width); - } - } - else if (kern_run == KERN_RUN_3) - { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) - { - num_elements = CEILDIV (num_elements, device_param->vector_width); - } - } - - num_elements = round_up_multiple_64 (num_elements, kernel_threads); - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1; } + else if (kern_run == KERN_RUN_3) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + + num_elements = round_up_multiple_64 (num_elements, kernel_threads); + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1; if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1; @@ -8839,7 +8817,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { if (user_options->attack_mode == ATTACK_MODE_BF) { - if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); @@ -9092,10 +9070,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size; - if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) + if (user_options->attack_mode == ATTACK_MODE_BF) { - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1; - //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1; + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1; + } } } else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) @@ -9422,7 +9403,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { if (user_options->attack_mode == ATTACK_MODE_BF) { - if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) { snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); @@ -9670,10 +9651,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r) == -1) return -1; - if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) + if (user_options->attack_mode == ATTACK_MODE_BF) { - if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]) == -1) return -1; + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]) == -1) return -1; + if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]) == -1) return -1; + } } } else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c index e97267a92..d2b325339 100644 --- a/src/modules/module_01500.c +++ b/src/modules/module_01500.c @@ -22,7 +22,7 @@ static const char *HASH_NAME = "descrypt, DES (Unix), Traditional DES"; static const u64 KERN_TYPE = 1500; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE - | OPTS_TYPE_PT_BITSLICE; + | OPTS_TYPE_TM_KERNEL; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = NULL; // the self-test can't work because the salt is not part of the code at compile-time static const char *ST_HASH = "8133vc.5rieNk"; @@ -73,6 +73,13 @@ int module_build_plain_postprocess (MAYBE_UNUSED const hashconfig_t *hashconfig, return src_len; } +u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_threads_max = 64; // performance only optimization + + return kernel_threads_max; +} + u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { u32 kernel_loops_max = KERNEL_LOOPS_MAX; @@ -301,7 +308,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = module_kernel_loops_max; module_ctx->module_kernel_loops_min = module_kernel_loops_min; - module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = module_kernel_threads_max; module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; diff --git a/src/modules/module_03000.c b/src/modules/module_03000.c index e98cb294c..8d5e8c7a8 100644 --- a/src/modules/module_03000.c +++ b/src/modules/module_03000.c @@ -23,7 +23,7 @@ static const u64 KERN_TYPE = 3000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_UPPER - | OPTS_TYPE_PT_BITSLICE + | OPTS_TYPE_TM_KERNEL | OPTS_TYPE_PT_ALWAYS_ASCII | OPTS_TYPE_PT_LM | OPTS_TYPE_HASH_SPLIT; diff --git a/src/modules/module_14000.c b/src/modules/module_14000.c index a26ce6cf1..e2dabbed1 100644 --- a/src/modules/module_14000.c +++ b/src/modules/module_14000.c @@ -22,7 +22,7 @@ static const char *HASH_NAME = "DES (PT = $salt, key = $pass)"; static const u64 KERN_TYPE = 14000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE - | OPTS_TYPE_PT_BITSLICE + | OPTS_TYPE_TM_KERNEL | OPTS_TYPE_ST_HEX; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat1"; diff --git a/src/selftest.c b/src/selftest.c index 0acb7b2a1..829f40f69 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -201,7 +201,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param { device_param->kernel_params_buf32[30] = 1; - if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) { pw_t pw;