From 71a8459d851d246945343ea59effa1d46b965bf8 Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 30 Aug 2016 17:44:14 +0200 Subject: [PATCH] Two new modes added: * Added new hash-mode 14000 = DES (PT = $salt, key = $pass) * Added new hash-mode 14100 = 3DES (PT = $salt, key = $pass) This patch initial was created out of this PR: https://github.com/hashcat/hashcat/pull/452 which is now no longer required I've replace the -a 3 kernel with a bitsliced version which is around 10 times faster I've also added test.sh and test.pl patches, but some of the test scripts need to be fixed --- OpenCL/inc_vendor.cl | 6 + OpenCL/m14000_a0.cl | 741 +++++++++++ OpenCL/m14000_a1.cl | 817 +++++++++++++ OpenCL/m14000_a3.cl | 2770 ++++++++++++++++++++++++++++++++++++++++++ OpenCL/m14100_a0.cl | 845 +++++++++++++ OpenCL/m14100_a1.cl | 932 ++++++++++++++ OpenCL/m14100_a3.cl | 1163 ++++++++++++++++++ docs/changes.txt | 9 + docs/readme.txt | 2 + include/shared.h | 10 + src/hashcat.c | 76 +- src/shared.c | 69 ++ tools/test.pl | 56 +- tools/test.sh | 92 +- 14 files changed, 7582 insertions(+), 6 deletions(-) create mode 100644 OpenCL/m14000_a0.cl create mode 100644 OpenCL/m14000_a1.cl create mode 100644 OpenCL/m14000_a3.cl create mode 100644 OpenCL/m14100_a0.cl create mode 100644 OpenCL/m14100_a1.cl create mode 100644 OpenCL/m14100_a3.cl diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index 3adde9629..b302acfc9 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -128,6 +128,12 @@ #if KERN_TYPE == 13723 #undef _unroll #endif +#if KERN_TYPE == 14000 +#undef _unroll +#endif +#if KERN_TYPE == 14100 +#undef _unroll +#endif #endif #endif diff --git a/OpenCL/m14000_a0.cl b/OpenCL/m14000_a0.cl new file mode 100644 index 000000000..830c56ad0 --- /dev/null +++ b/OpenCL/m14000_a0.cl @@ -0,0 +1,741 @@ +/** + * Authors.....: Jens Steube + * Gabriele Gristina + * magnum + * Frans Lategan + * + * License.....: MIT + */ + +#define _DES_ + +#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_simd.cl" + +#define PERM_OP(a,b,tt,n,m) \ +{ \ + tt = a >> n; \ + tt = tt ^ b; \ + tt = tt & m; \ + b = b ^ tt; \ + tt = tt << n; \ + a = a ^ tt; \ +} + +#define HPERM_OP(a,tt,n,m) \ +{ \ + tt = a << (16 + n); \ + tt = tt ^ a; \ + tt = tt & m; \ + a = a ^ tt; \ + tt = tt >> (16 + n); \ + a = a ^ tt; \ +} + +#define IP(l,r,tt) \ +{ \ + PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ + PERM_OP (l, r, tt, 16, 0x0000ffff); \ + PERM_OP (r, l, tt, 2, 0x33333333); \ + PERM_OP (l, r, tt, 8, 0x00ff00ff); \ + PERM_OP (r, l, tt, 1, 0x55555555); \ +} + +#define FP(l,r,tt) \ +{ \ + PERM_OP (l, r, tt, 1, 0x55555555); \ + PERM_OP (r, l, tt, 8, 0x00ff00ff); \ + PERM_OP (l, r, tt, 2, 0x33333333); \ + PERM_OP (r, l, tt, 16, 0x0000ffff); \ + PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ +} + +__constant u32 c_SPtrans[8][64] = +{ + { + /* nibble 0 */ + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + /* nibble 1 */ + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + /* nibble 2 */ + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + /* nibble 3 */ + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + /* nibble 4 */ + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + /* nibble 5 */ + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + /* nibble 6 */ + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + /* nibble 7 */ + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + }, +}; + +__constant u32 c_skb[8][64] = +{ + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + 0x00090820, 0x00090830, 0x20090820, 0x20090830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + } +}; + +#if VECT_SIZE == 1 +#define BOX(i,n,S) (S)[(n)][(i)] +#elif VECT_SIZE == 2 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#elif VECT_SIZE == 4 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#elif VECT_SIZE == 8 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) +#elif VECT_SIZE == 16 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) +#endif + +#if VECT_SIZE == 1 +#define BOX1(i,S) (S)[(i)] +#elif VECT_SIZE == 2 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1]) +#elif VECT_SIZE == 4 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#elif VECT_SIZE == 8 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) +#endif + +void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i += 2) + { + u32x u; + u32x t; + + u = Kc[i + 0] ^ r; + t = Kd[i + 0] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i + 1] ^ l; + t = Kd[i + 1] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64]) +{ + u32x tt; + + PERM_OP (d, c, tt, 4, 0x0f0f0f0f); + HPERM_OP (c, tt, 2, 0xcccc0000); + HPERM_OP (d, tt, 2, 0xcccc0000); + PERM_OP (d, c, tt, 1, 0x55555555); + PERM_OP (c, d, tt, 8, 0x00ff00ff); + PERM_OP (d, c, tt, 1, 0x55555555); + + d = ((d & 0x000000ff) << 16) + | ((d & 0x0000ff00) << 0) + | ((d & 0x00ff0000) >> 16) + | ((c & 0xf0000000) >> 4); + + c = c & 0x0fffffff; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i++) + { + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } + + c = c & 0x0fffffff; + d = d & 0x0fffffff; + + const u32x c00 = (c >> 0) & 0x0000003f; + const u32x c06 = (c >> 6) & 0x00383003; + const u32x c07 = (c >> 7) & 0x0000003c; + const u32x c13 = (c >> 13) & 0x0000060f; + const u32x c20 = (c >> 20) & 0x00000001; + + u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); + + const u32x d00 = (d >> 0) & 0x00003c3f; + const u32x d07 = (d >> 7) & 0x00003f03; + const u32x d21 = (d >> 21) & 0x0000000f; + const u32x d22 = (d >> 22) & 0x00000030; + + u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); + + Kc[i] = ((t << 16) | (s & 0x0000ffff)); + Kd[i] = ((s >> 16) | (t & 0xffff0000)); + + Kc[i] = rotl32 (Kc[i], 2u); + Kd[i] = rotl32 (Kd[i], 2u); + } +} + +__kernel void m14000_m04 (__global pw_t *pws, __global kernel_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = 0; + pw_buf0[3] = 0; + pw_buf1[0] = 0; + pw_buf1[1] = 0; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * main + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x c = w0[0]; + const u32x d = w0[1]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x iv[2]; + + _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); + + u32x z = 0; + + COMPARE_M_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_s04 (__global pw_t *pws, __global kernel_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = 0; + pw_buf0[3] = 0; + pw_buf1[0] = 0; + pw_buf1[1] = 0; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * main + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x c = w0[0]; + const u32x d = w0[1]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x iv[2]; + + _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); + + u32x z = 0; + + COMPARE_S_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} diff --git a/OpenCL/m14000_a1.cl b/OpenCL/m14000_a1.cl new file mode 100644 index 000000000..c311277e8 --- /dev/null +++ b/OpenCL/m14000_a1.cl @@ -0,0 +1,817 @@ +/** + * Authors.....: Jens Steube + * Gabriele Gristina + * Frans Lategan + * + * License.....: MIT + */ + +#define _DES_ + +#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" + +#define PERM_OP(a,b,tt,n,m) \ +{ \ + tt = a >> n; \ + tt = tt ^ b; \ + tt = tt & m; \ + b = b ^ tt; \ + tt = tt << n; \ + a = a ^ tt; \ +} + +#define HPERM_OP(a,tt,n,m) \ +{ \ + tt = a << (16 + n); \ + tt = tt ^ a; \ + tt = tt & m; \ + a = a ^ tt; \ + tt = tt >> (16 + n); \ + a = a ^ tt; \ +} + +#define IP(l,r,tt) \ +{ \ + PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ + PERM_OP (l, r, tt, 16, 0x0000ffff); \ + PERM_OP (r, l, tt, 2, 0x33333333); \ + PERM_OP (l, r, tt, 8, 0x00ff00ff); \ + PERM_OP (r, l, tt, 1, 0x55555555); \ +} + +#define FP(l,r,tt) \ +{ \ + PERM_OP (l, r, tt, 1, 0x55555555); \ + PERM_OP (r, l, tt, 8, 0x00ff00ff); \ + PERM_OP (l, r, tt, 2, 0x33333333); \ + PERM_OP (r, l, tt, 16, 0x0000ffff); \ + PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ +} + +__constant u32 c_SPtrans[8][64] = +{ + { + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + } +}; + +__constant u32 c_skb[8][64] = +{ + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + 0x00090820, 0x00090830, 0x20090820, 0x20090830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + } +}; + +#if VECT_SIZE == 1 +#define BOX(i,n,S) (S)[(n)][(i)] +#elif VECT_SIZE == 2 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#elif VECT_SIZE == 4 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#elif VECT_SIZE == 8 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) +#elif VECT_SIZE == 16 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) +#endif + +#if VECT_SIZE == 1 +#define BOX1(i,S) (S)[(i)] +#elif VECT_SIZE == 2 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1]) +#elif VECT_SIZE == 4 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#elif VECT_SIZE == 8 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) +#endif + +void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i += 2) + { + u32x u; + u32x t; + + u = Kc[i + 0] ^ r; + t = Kd[i + 0] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i + 1] ^ l; + t = Kd[i + 1] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64]) +{ + u32x tt; + + PERM_OP (d, c, tt, 4, 0x0f0f0f0f); + HPERM_OP (c, tt, 2, 0xcccc0000); + HPERM_OP (d, tt, 2, 0xcccc0000); + PERM_OP (d, c, tt, 1, 0x55555555); + PERM_OP (c, d, tt, 8, 0x00ff00ff); + PERM_OP (d, c, tt, 1, 0x55555555); + + d = ((d & 0x000000ff) << 16) + | ((d & 0x0000ff00) << 0) + | ((d & 0x00ff0000) >> 16) + | ((c & 0xf0000000) >> 4); + + c = c & 0x0fffffff; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i++) + { + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } + + c = c & 0x0fffffff; + d = d & 0x0fffffff; + + const u32x c00 = (c >> 0) & 0x0000003f; + const u32x c06 = (c >> 6) & 0x00383003; + const u32x c07 = (c >> 7) & 0x0000003c; + const u32x c13 = (c >> 13) & 0x0000060f; + const u32x c20 = (c >> 20) & 0x00000001; + + u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); + + const u32x d00 = (d >> 0) & 0x00003c3f; + const u32x d07 = (d >> 7) & 0x00003f03; + const u32x d21 = (d >> 21) & 0x0000000f; + const u32x d22 = (d >> 22) & 0x00000030; + + u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); + + Kc[i] = ((t << 16) | (s & 0x0000ffff)); + Kd[i] = ((s >> 16) | (t & 0xffff0000)); + + Kc[i] = rotl32 (Kc[i], 2u); + Kd[i] = rotl32 (Kd[i], 2u); + } +} + +__kernel void m14000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = 0; + pw_buf0[3] = 0; + pw_buf1[0] = 0; + pw_buf1[1] = 0; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[2]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + + const u32x c = w0[0]; + const u32x d = w0[1]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x iv[2]; + + _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); + + u32x z = 0; + + COMPARE_M_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = 0; + pw_buf0[3] = 0; + pw_buf1[0] = 0; + pw_buf1[1] = 0; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[2]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + + + const u32x c = (w0[0]); + const u32x d = (w0[1]); + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x iv[2]; + + _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); + + u32x z = 0; + + COMPARE_S_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} diff --git a/OpenCL/m14000_a3.cl b/OpenCL/m14000_a3.cl new file mode 100644 index 000000000..3b0e52580 --- /dev/null +++ b/OpenCL/m14000_a3.cl @@ -0,0 +1,2770 @@ +/** + * Author......: Jens Steube + * License.....: MIT + * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below + * : sboxes for others were takes fron JtR, license below + */ + +#define _DES_ + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" + +#define COMPARE_S "inc_comp_single_bs.cl" +#define COMPARE_M "inc_comp_multi_bs.cl" + +#ifdef IS_NV +#define KXX_DECL +#endif + +#ifdef IS_AMD +#define KXX_DECL +#endif + +#ifdef IS_GENERIC +#define KXX_DECL +#endif + +#ifdef IS_NV + +#if CUDA_ARCH >= 500 + +// +// Bitslice DES S-boxes with LOP3.LUT instructions +// For NVIDIA Maxwell architecture and CUDA 7.5 RC +// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19 +// +// Gate counts: 25 24 25 18 25 24 24 23 +// Average: 23.5 +// Depth: 8 7 7 6 8 10 10 8 +// Average: 8 +// +// Note that same S-box function with a lower gate count isn't necessarily faster. +// +// These Boolean expressions corresponding to DES S-boxes were +// discovered by +// +// This file itself is Copyright (c) 2015 by +// Redistribution and use in source and binary forms, with or without +// modification, are permitted. +// +// The underlying mathematical formulas are NOT copyrighted. +// + +#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d)); + +void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) + LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) + LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) + LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) + LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) + LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) + LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) + LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) + LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) + LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) + LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) + LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) + LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) + LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) + LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) + LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) + LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) + LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) + LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) + LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) + LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) + LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) + LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) + LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) + LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) + LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) + LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) + LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) + LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) + LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) + LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) + LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) + LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) + LUT(x3333CCCC00000000, a2, a5, a6, 0x14) + LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) + LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) + LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) + LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) + LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) + LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) + LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) + LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) + LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) + LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) + LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) + LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) + LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) + LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) + LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) + LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) + LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) + LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) + LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) + LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) + LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) + LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) + LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) + LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) + LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) + LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) + LUT(x500F500F500F500F, a1, a3, a4, 0x98) + LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) + LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) + LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) + LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) + LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) + LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) + LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) + LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) + LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) + LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) + LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(x55F055F055F055F0, a1, a3, a4, 0x72) + LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) + LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) + LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) + LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) + LUT(x9999666699996666, a1, a2, a5, 0x69) + LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) + LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) + LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) + LUT(x4848484848484848, a1, a2, a3, 0x12) + LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) + LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) + LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) + LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) + LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) + LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) + LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) + LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) + LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) + LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) + LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) + LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) + LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) + LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) + LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) + LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) + LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) + LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) + LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) + LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) + LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) + LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) + LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) + LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) + LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) + LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) + LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) + LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) + LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) + LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) + LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) + LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) + LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) + LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) + LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) + LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) + LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) + LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) + LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) + LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) + LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) + LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) + LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) + LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) + LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) + LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) + LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) + LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) + LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) + LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) + LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) + LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) + LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) + LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) + LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) + LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) + LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) + LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) + LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) + LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) + LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) + LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) + LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) + LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) + LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) + LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) + LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) + LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) + LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) + LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) + LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) + LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) + LUT(xA050A050A050A050, a1, a3, a4, 0x21) + LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) + LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) + LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) + LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) + LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) + LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) + LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) + LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) + LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) + LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) + LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) + LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) + LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) + LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) + LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) + LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) + LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) + LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) + LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) + LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) + LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) + LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) + LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) + LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) + LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) + LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) + LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +#else + +/* + * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC + * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. + * + * Gate counts: 49 44 46 33 48 46 46 41 + * Average: 44.125 + * + * Several same-gate-count expressions for each S-box are included (for use on + * different CPUs/GPUs). + * + * These Boolean expressions corresponding to DES S-boxes have been generated + * by Roman Rusakov for use in Openwall's + * John the Ripper password cracker: http://www.openwall.com/john/ + * Being mathematical formulas, they are not copyrighted and are free for reuse + * by anyone. + * + * This file (a specific representation of the S-box expressions, surrounding + * logic) is Copyright (c) 2011 by Solar Designer . + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. (This is a heavily cut-down "BSD license".) + * + * The effort has been sponsored by Rapid7: http://www.rapid7.com + */ + +void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, + x25202160; + u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; + u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; + u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; + u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; + u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; + u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; + u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x55005500 = a1 & ~a5; + x5A0F5A0F = a4 ^ x55005500; + x3333FFFF = a3 | a6; + x66666666 = a1 ^ a3; + x22226666 = x3333FFFF & x66666666; + x2D2D6969 = a4 ^ x22226666; + x25202160 = x2D2D6969 & ~x5A0F5A0F; + + x00FFFF00 = a5 ^ a6; + x33CCCC33 = a3 ^ x00FFFF00; + x4803120C = x5A0F5A0F & ~x33CCCC33; + x2222FFFF = a6 | x22226666; + x6A21EDF3 = x4803120C ^ x2222FFFF; + x4A01CC93 = x6A21EDF3 & ~x25202160; + + x5555FFFF = a1 | a6; + x7F75FFFF = x6A21EDF3 | x5555FFFF; + x00D20096 = a5 & ~x2D2D6969; + x7FA7FF69 = x7F75FFFF ^ x00D20096; + + x0A0A0000 = a4 & ~x5555FFFF; + x0AD80096 = x00D20096 ^ x0A0A0000; + x00999900 = x00FFFF00 & ~x66666666; + x0AD99996 = x0AD80096 | x00999900; + + x22332233 = a3 & ~x55005500; + x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; + x054885C0 = x257AA5F0 & ~x22332233; + xFAB77A3F = ~x054885C0; + x2221EDF3 = x3333FFFF & x6A21EDF3; + xD89697CC = xFAB77A3F ^ x2221EDF3; + x20 = x7FA7FF69 & ~a2; + x21 = x20 ^ xD89697CC; + *out3 ^= x21; + + x05B77AC0 = x00FFFF00 ^ x054885C0; + x05F77AD6 = x00D20096 | x05B77AC0; + x36C48529 = x3333FFFF ^ x05F77AD6; + x6391D07C = a1 ^ x36C48529; + xBB0747B0 = xD89697CC ^ x6391D07C; + x00 = x25202160 | a2; + x01 = x00 ^ xBB0747B0; + *out1 ^= x01; + + x4C460000 = x3333FFFF ^ x7F75FFFF; + x4EDF9996 = x0AD99996 | x4C460000; + x2D4E49EA = x6391D07C ^ x4EDF9996; + xBBFFFFB0 = x00FFFF00 | xBB0747B0; + x96B1B65A = x2D4E49EA ^ xBBFFFFB0; + x10 = x4A01CC93 | a2; + x11 = x10 ^ x96B1B65A; + *out2 ^= x11; + + x5AFF5AFF = a5 | x5A0F5A0F; + x52B11215 = x5AFF5AFF & ~x2D4E49EA; + x4201C010 = x4A01CC93 & x6391D07C; + x10B0D205 = x52B11215 ^ x4201C010; + x30 = x10B0D205 | a2; + x31 = x30 ^ x0AD99996; + *out4 ^= x31; +} + +void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x33CC33CC; + u32 x55550000, x00AA00FF, x33BB33FF; + u32 x33CC0000, x11441144, x11BB11BB, x003311BB; + u32 x00000F0F, x336600FF, x332200FF, x332200F0; + u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; + u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; + u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; + u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; + u32 x0A451047, xBBDFDD7B, xB19ACD3C; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x33CC33CC = a2 ^ a5; + + x55550000 = a1 & ~a6; + x00AA00FF = a5 & ~x55550000; + x33BB33FF = a2 | x00AA00FF; + + x33CC0000 = x33CC33CC & ~a6; + x11441144 = a1 & x33CC33CC; + x11BB11BB = a5 ^ x11441144; + x003311BB = x11BB11BB & ~x33CC0000; + + x00000F0F = a3 & a6; + x336600FF = x00AA00FF ^ x33CC0000; + x332200FF = x33BB33FF & x336600FF; + x332200F0 = x332200FF & ~x00000F0F; + + x0302000F = a3 & x332200FF; + xAAAAAAAA = ~a1; + xA9A8AAA5 = x0302000F ^ xAAAAAAAA; + x33CCCC33 = a6 ^ x33CC33CC; + x33CCC030 = x33CCCC33 & ~x00000F0F; + x9A646A95 = xA9A8AAA5 ^ x33CCC030; + x10 = a4 & ~x332200F0; + x11 = x10 ^ x9A646A95; + *out2 ^= x11; + + x00333303 = a2 & ~x33CCC030; + x118822B8 = x11BB11BB ^ x00333303; + xA8208805 = xA9A8AAA5 & ~x118822B8; + x3CC3C33C = a3 ^ x33CCCC33; + x94E34B39 = xA8208805 ^ x3CC3C33C; + x00 = x33BB33FF & ~a4; + x01 = x00 ^ x94E34B39; + *out1 ^= x01; + + x0331330C = x0302000F ^ x00333303; + x3FF3F33C = x3CC3C33C | x0331330C; + xA9DF596A = x33BB33FF ^ x9A646A95; + xA9DF5F6F = x00000F0F | xA9DF596A; + x962CAC53 = x3FF3F33C ^ xA9DF5F6F; + + xA9466A6A = x332200FF ^ x9A646A95; + x3DA52153 = x94E34B39 ^ xA9466A6A; + x29850143 = xA9DF5F6F & x3DA52153; + x33C0330C = x33CC33CC & x3FF3F33C; + x1A45324F = x29850143 ^ x33C0330C; + x20 = x1A45324F | a4; + x21 = x20 ^ x962CAC53; + *out3 ^= x21; + + x0A451047 = x1A45324F & ~x118822B8; + xBBDFDD7B = x33CCCC33 | xA9DF596A; + xB19ACD3C = x0A451047 ^ xBBDFDD7B; + x30 = x003311BB | a4; + x31 = x30 ^ xB19ACD3C; + *out4 ^= x31; +} + +void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; + u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; + u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; + u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; + u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; + u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; + u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; + u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x44444444 = a1 & ~a2; + x0F0FF0F0 = a3 ^ a6; + x4F4FF4F4 = x44444444 | x0F0FF0F0; + x00FFFF00 = a4 ^ a6; + x00AAAA00 = x00FFFF00 & ~a1; + x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; + + x3C3CC3C3 = a2 ^ x0F0FF0F0; + x3C3C0000 = x3C3CC3C3 & ~a6; + x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; + x0C840A00 = x4FE55EF4 & ~x7373F4F4; + + x00005EF4 = a6 & x4FE55EF4; + x00FF5EFF = a4 | x00005EF4; + x00555455 = a1 & x00FF5EFF; + x3C699796 = x3C3CC3C3 ^ x00555455; + x30 = x4FE55EF4 & ~a5; + x31 = x30 ^ x3C699796; + *out4 ^= x31; + + x000FF000 = x0F0FF0F0 & x00FFFF00; + x55AA55AA = a1 ^ a4; + x26D9A15E = x7373F4F4 ^ x55AA55AA; + x2FDFAF5F = a3 | x26D9A15E; + x2FD00F5F = x2FDFAF5F & ~x000FF000; + + x55AAFFAA = x00AAAA00 | x55AA55AA; + x28410014 = x3C699796 & ~x55AAFFAA; + x000000FF = a4 & a6; + x000000CC = x000000FF & ~a2; + x284100D8 = x28410014 ^ x000000CC; + + x204100D0 = x7373F4F4 & x284100D8; + x3C3CC3FF = x3C3CC3C3 | x000000FF; + x1C3CC32F = x3C3CC3FF & ~x204100D0; + x4969967A = a1 ^ x1C3CC32F; + x10 = x2FD00F5F & a5; + x11 = x10 ^ x4969967A; + *out2 ^= x11; + + x4CC44CC4 = x4FE55EF4 & ~a2; + x40C040C0 = x4CC44CC4 & ~a3; + xC3C33C3C = ~x3C3CC3C3; + x9669C396 = x55AAFFAA ^ xC3C33C3C; + xD6A98356 = x40C040C0 ^ x9669C396; + x00 = a5 & ~x0C840A00; + x01 = x00 ^ xD6A98356; + *out1 ^= x01; + + xD6E9C3D6 = x40C040C0 | x9669C396; + x4CEEEEC4 = x00AAAA00 | x4CC44CC4; + x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; + x001A000B = a4 & ~x4FE55EF4; + x9A1F2D1B = x9A072D12 | x001A000B; + x20 = a5 & ~x284100D8; + x21 = x20 ^ x9A1F2D1B; + *out3 ^= x21; +} + +void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x5A5A5A5A, x0F0FF0F0; + u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, + x52FBCA0F, x61C8F93C; + u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; + u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; + u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x5A5A5A5A = a1 ^ a3; + x0F0FF0F0 = a3 ^ a5; + x33FF33FF = a2 | a4; + x33FFCC00 = a5 ^ x33FF33FF; + x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; + x0C0CC0C0 = x0F0FF0F0 & ~a2; + x0CF3C03F = a4 ^ x0C0CC0C0; + x5EFBDA7F = x5A5A5A5A | x0CF3C03F; + x52FBCA0F = x5EFBDA7F & ~x0C0030F0; + x61C8F93C = a2 ^ x52FBCA0F; + + x00C0C03C = x0CF3C03F & x61C8F93C; + x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; + x3B92A366 = x5A5A5A5A ^ x61C8F93C; + x30908326 = x3B92A366 & ~x0F0F30C0; + x3C90B3D6 = x0C0030F0 ^ x30908326; + + x33CC33CC = a2 ^ a4; + x0C0CFFFF = a5 | x0C0CC0C0; + x379E5C99 = x3B92A366 ^ x0C0CFFFF; + x04124C11 = x379E5C99 & ~x33CC33CC; + x56E9861E = x52FBCA0F ^ x04124C11; + x00 = a6 & ~x3C90B3D6; + x01 = x00 ^ x56E9861E; + *out1 ^= x01; + + xA91679E1 = ~x56E9861E; + x10 = x3C90B3D6 & ~a6; + x11 = x10 ^ xA91679E1; + *out2 ^= x11; + + x9586CA37 = x3C90B3D6 ^ xA91679E1; + x8402C833 = x9586CA37 & ~x33CC33CC; + x84C2C83F = x00C0C03C | x8402C833; + xB35C94A6 = x379E5C99 ^ x84C2C83F; + x20 = x61C8F93C | a6; + x21 = x20 ^ xB35C94A6; + *out3 ^= x21; + + x30 = a6 & x61C8F93C; + x31 = x30 ^ xB35C94A6; + *out4 ^= x31; +} + +void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; + u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; + u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; + u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; + u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; + u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; + u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; + u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x77777777 = a1 | a3; + x77770000 = x77777777 & ~a6; + x22225555 = a1 ^ x77770000; + x11116666 = a3 ^ x22225555; + x1F1F6F6F = a4 | x11116666; + + x70700000 = x77770000 & ~a4; + x43433333 = a3 ^ x70700000; + x00430033 = a5 & x43433333; + x55557777 = a1 | x11116666; + x55167744 = x00430033 ^ x55557777; + x5A19784B = a4 ^ x55167744; + + x5A1987B4 = a6 ^ x5A19784B; + x7A3BD7F5 = x22225555 | x5A1987B4; + x003B00F5 = a5 & x7A3BD7F5; + x221955A0 = x22225555 ^ x003B00F5; + x05050707 = a4 & x55557777; + x271C52A7 = x221955A0 ^ x05050707; + + x2A2A82A0 = x7A3BD7F5 & ~a1; + x6969B193 = x43433333 ^ x2A2A82A0; + x1FE06F90 = a5 ^ x1F1F6F6F; + x16804E00 = x1FE06F90 & ~x6969B193; + xE97FB1FF = ~x16804E00; + x20 = xE97FB1FF & ~a2; + x21 = x20 ^ x5A19784B; + *out3 ^= x21; + + x43403302 = x43433333 & ~x003B00F5; + x35CAED30 = x2A2A82A0 ^ x1FE06F90; + x37DEFFB7 = x271C52A7 | x35CAED30; + x349ECCB5 = x37DEFFB7 & ~x43403302; + x0B01234A = x1F1F6F6F & ~x349ECCB5; + + x101884B4 = x5A1987B4 & x349ECCB5; + x0FF8EB24 = x1FE06F90 ^ x101884B4; + x41413333 = x43433333 & x55557777; + x4FF9FB37 = x0FF8EB24 | x41413333; + x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; + x30 = x4FC2FBC2 & a2; + x31 = x30 ^ x271C52A7; + *out4 ^= x31; + + x22222222 = a1 ^ x77777777; + x16BCEE97 = x349ECCB5 ^ x22222222; + x0F080B04 = a4 & x0FF8EB24; + x19B4E593 = x16BCEE97 ^ x0F080B04; + x00 = x0B01234A | a2; + x01 = x00 ^ x19B4E593; + *out1 ^= x01; + + x5C5C5C5C = x1F1F6F6F ^ x43433333; + x4448184C = x5C5C5C5C & ~x19B4E593; + x2DDABE71 = x22225555 ^ x0FF8EB24; + x6992A63D = x4448184C ^ x2DDABE71; + x10 = x1F1F6F6F & a2; + x11 = x10 ^ x6992A63D; + *out2 ^= x11; +} + +void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x33CC33CC; + u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; + u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; + u32 x09030C06, x09030000, x336622FF, x3A6522FF; + u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; + u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; + u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; + u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; + u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x33CC33CC = a2 ^ a5; + + x3333FFFF = a2 | a6; + x11115555 = a1 & x3333FFFF; + x22DD6699 = x33CC33CC ^ x11115555; + x22DD9966 = a6 ^ x22DD6699; + x00220099 = a5 & ~x22DD9966; + + x00551144 = a1 & x22DD9966; + x33662277 = a2 ^ x00551144; + x5A5A5A5A = a1 ^ a3; + x7B7E7A7F = x33662277 | x5A5A5A5A; + x59A31CE6 = x22DD6699 ^ x7B7E7A7F; + + x09030C06 = a3 & x59A31CE6; + x09030000 = x09030C06 & ~a6; + x336622FF = x00220099 | x33662277; + x3A6522FF = x09030000 ^ x336622FF; + x30 = x3A6522FF & a4; + x31 = x30 ^ x59A31CE6; + *out4 ^= x31; + + x484D494C = a2 ^ x7B7E7A7F; + x0000B6B3 = a6 & ~x484D494C; + x0F0FB9BC = a3 ^ x0000B6B3; + x00FC00F9 = a5 & ~x09030C06; + x0FFFB9FD = x0F0FB9BC | x00FC00F9; + + x5DF75DF7 = a1 | x59A31CE6; + x116600F7 = x336622FF & x5DF75DF7; + x1E69B94B = x0F0FB9BC ^ x116600F7; + x1668B94B = x1E69B94B & ~x09030000; + x20 = x00220099 | a4; + x21 = x20 ^ x1668B94B; + *out3 ^= x21; + + x7B7B7B7B = a2 | x5A5A5A5A; + x411E5984 = x3A6522FF ^ x7B7B7B7B; + x1FFFFDFD = x11115555 | x0FFFB9FD; + x5EE1A479 = x411E5984 ^ x1FFFFDFD; + + x3CB4DFD2 = x22DD6699 ^ x1E69B94B; + x004B002D = a5 & ~x3CB4DFD2; + xB7B2B6B3 = ~x484D494C; + xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; + xCC82CDE5 = x004B002D ^ xCCC9CDC8; + x10 = xCC82CDE5 & ~a4; + x11 = x10 ^ x5EE1A479; + *out2 ^= x11; + + x0055EEBB = a6 ^ x00551144; + x5A5AECE9 = a1 ^ x0F0FB9BC; + x0050ECA9 = x0055EEBB & x5A5AECE9; + xC5CAC1CE = x09030C06 ^ xCCC9CDC8; + xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; + x00 = x0FFFB9FD & ~a4; + x01 = x00 ^ xC59A2D67; + *out1 ^= x01; +} + +void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; + u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; + u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; + u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; + u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; + u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; + u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; + u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x0FF00FF0 = a4 ^ a5; + x3CC33CC3 = a3 ^ x0FF00FF0; + x00003CC3 = a6 & x3CC33CC3; + x0F000F00 = a4 & x0FF00FF0; + x5A555A55 = a2 ^ x0F000F00; + x00001841 = x00003CC3 & x5A555A55; + + x00000F00 = a6 & x0F000F00; + x33333C33 = a3 ^ x00000F00; + x7B777E77 = x5A555A55 | x33333C33; + x0FF0F00F = a6 ^ x0FF00FF0; + x74878E78 = x7B777E77 ^ x0FF0F00F; + x30 = a1 & ~x00001841; + x31 = x30 ^ x74878E78; + *out4 ^= x31; + + x003C003C = a5 & ~x3CC33CC3; + x5A7D5A7D = x5A555A55 | x003C003C; + x333300F0 = x00003CC3 ^ x33333C33; + x694E5A8D = x5A7D5A7D ^ x333300F0; + + x0FF0CCCC = x00003CC3 ^ x0FF0F00F; + x000F0303 = a4 & ~x0FF0CCCC; + x5A505854 = x5A555A55 & ~x000F0303; + x33CC000F = a5 ^ x333300F0; + x699C585B = x5A505854 ^ x33CC000F; + + x7F878F78 = x0F000F00 | x74878E78; + x21101013 = a3 & x699C585B; + x7F979F7B = x7F878F78 | x21101013; + x30030CC0 = x3CC33CC3 & ~x0FF0F00F; + x4F9493BB = x7F979F7B ^ x30030CC0; + x00 = x4F9493BB & ~a1; + x01 = x00 ^ x694E5A8D; + *out1 ^= x01; + + x6F9CDBFB = x699C585B | x4F9493BB; + x0000DBFB = a6 & x6F9CDBFB; + x00005151 = a2 & x0000DBFB; + x26DAC936 = x694E5A8D ^ x4F9493BB; + x26DA9867 = x00005151 ^ x26DAC936; + + x27DA9877 = x21101013 | x26DA9867; + x27DA438C = x0000DBFB ^ x27DA9877; + x2625C9C9 = a5 ^ x26DAC936; + x27FFCBCD = x27DA438C | x2625C9C9; + x20 = x27FFCBCD & a1; + x21 = x20 ^ x699C585B; + *out3 ^= x21; + + x27FF1036 = x0000DBFB ^ x27FFCBCD; + x27FF103E = x003C003C | x27FF1036; + xB06B6C44 = ~x4F9493BB; + x97947C7A = x27FF103E ^ xB06B6C44; + x10 = x97947C7A & ~a1; + x11 = x10 ^ x26DA9867; + *out2 ^= x11; +} + +void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; + u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; + u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; + u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; + u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; + u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; + u32 xF700A600, x61008000, x03B7856B, x62B7056B; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x0C0C0C0C = a3 & ~a2; + x0000F0F0 = a5 & ~a3; + x00FFF00F = a4 ^ x0000F0F0; + x00555005 = a1 & x00FFF00F; + x00515001 = x00555005 & ~x0C0C0C0C; + + x33000330 = a2 & ~x00FFF00F; + x77555775 = a1 | x33000330; + x30303030 = a2 & ~a3; + x3030CFCF = a5 ^ x30303030; + x30104745 = x77555775 & x3030CFCF; + x30555745 = x00555005 | x30104745; + + xFF000FF0 = ~x00FFF00F; + xCF1048B5 = x30104745 ^ xFF000FF0; + x080A080A = a3 & ~x77555775; + xC71A40BF = xCF1048B5 ^ x080A080A; + xCB164CB3 = x0C0C0C0C ^ xC71A40BF; + x10 = x00515001 | a6; + x11 = x10 ^ xCB164CB3; + *out2 ^= x11; + + x9E4319E6 = a1 ^ xCB164CB3; + x000019E6 = a5 & x9E4319E6; + xF429738C = a2 ^ xC71A40BF; + xF4296A6A = x000019E6 ^ xF429738C; + xC729695A = x33000330 ^ xF4296A6A; + + xC47C3D2F = x30555745 ^ xF4296A6A; + xF77F3F3F = a2 | xC47C3D2F; + x9E43E619 = a5 ^ x9E4319E6; + x693CD926 = xF77F3F3F ^ x9E43E619; + x20 = x30555745 & a6; + x21 = x20 ^ x693CD926; + *out3 ^= x21; + + xF719A695 = x3030CFCF ^ xC729695A; + xF4FF73FF = a4 | xF429738C; + x03E6D56A = xF719A695 ^ xF4FF73FF; + x56B3803F = a1 ^ x03E6D56A; + x30 = x56B3803F & a6; + x31 = x30 ^ xC729695A; + *out4 ^= x31; + + xF700A600 = xF719A695 & ~a4; + x61008000 = x693CD926 & xF700A600; + x03B7856B = x00515001 ^ x03E6D56A; + x62B7056B = x61008000 ^ x03B7856B; + x00 = x62B7056B | a6; + x01 = x00 ^ xC729695A; + *out1 ^= x01; +} + +#endif +#endif + +#if defined IS_AMD || defined IS_GENERIC + +/* + * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC + * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. + * + * Gate counts: 49 44 46 33 48 46 46 41 + * Average: 44.125 + * + * Several same-gate-count expressions for each S-box are included (for use on + * different CPUs/GPUs). + * + * These Boolean expressions corresponding to DES S-boxes have been generated + * by Roman Rusakov for use in Openwall's + * John the Ripper password cracker: http://www.openwall.com/john/ + * Being mathematical formulas, they are not copyrighted and are free for reuse + * by anyone. + * + * This file (a specific representation of the S-box expressions, surrounding + * logic) is Copyright (c) 2011 by Solar Designer . + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. (This is a heavily cut-down "BSD license".) + * + * The effort has been sponsored by Rapid7: http://www.rapid7.com + */ + +void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, + x25202160; + u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; + u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; + u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; + u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; + u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; + u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; + u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x55005500 = a1 & ~a5; + x5A0F5A0F = a4 ^ x55005500; + x3333FFFF = a3 | a6; + x66666666 = a1 ^ a3; + x22226666 = x3333FFFF & x66666666; + x2D2D6969 = a4 ^ x22226666; + x25202160 = x2D2D6969 & ~x5A0F5A0F; + + x00FFFF00 = a5 ^ a6; + x33CCCC33 = a3 ^ x00FFFF00; + x4803120C = x5A0F5A0F & ~x33CCCC33; + x2222FFFF = a6 | x22226666; + x6A21EDF3 = x4803120C ^ x2222FFFF; + x4A01CC93 = x6A21EDF3 & ~x25202160; + + x5555FFFF = a1 | a6; + x7F75FFFF = x6A21EDF3 | x5555FFFF; + x00D20096 = a5 & ~x2D2D6969; + x7FA7FF69 = x7F75FFFF ^ x00D20096; + + x0A0A0000 = a4 & ~x5555FFFF; + x0AD80096 = x00D20096 ^ x0A0A0000; + x00999900 = x00FFFF00 & ~x66666666; + x0AD99996 = x0AD80096 | x00999900; + + x22332233 = a3 & ~x55005500; + x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; + x054885C0 = x257AA5F0 & ~x22332233; + xFAB77A3F = ~x054885C0; + x2221EDF3 = x3333FFFF & x6A21EDF3; + xD89697CC = xFAB77A3F ^ x2221EDF3; + x20 = x7FA7FF69 & ~a2; + x21 = x20 ^ xD89697CC; + *out3 ^= x21; + + x05B77AC0 = x00FFFF00 ^ x054885C0; + x05F77AD6 = x00D20096 | x05B77AC0; + x36C48529 = x3333FFFF ^ x05F77AD6; + x6391D07C = a1 ^ x36C48529; + xBB0747B0 = xD89697CC ^ x6391D07C; + x00 = x25202160 | a2; + x01 = x00 ^ xBB0747B0; + *out1 ^= x01; + + x4C460000 = x3333FFFF ^ x7F75FFFF; + x4EDF9996 = x0AD99996 | x4C460000; + x2D4E49EA = x6391D07C ^ x4EDF9996; + xBBFFFFB0 = x00FFFF00 | xBB0747B0; + x96B1B65A = x2D4E49EA ^ xBBFFFFB0; + x10 = x4A01CC93 | a2; + x11 = x10 ^ x96B1B65A; + *out2 ^= x11; + + x5AFF5AFF = a5 | x5A0F5A0F; + x52B11215 = x5AFF5AFF & ~x2D4E49EA; + x4201C010 = x4A01CC93 & x6391D07C; + x10B0D205 = x52B11215 ^ x4201C010; + x30 = x10B0D205 | a2; + x31 = x30 ^ x0AD99996; + *out4 ^= x31; +} + +void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x33CC33CC; + u32 x55550000, x00AA00FF, x33BB33FF; + u32 x33CC0000, x11441144, x11BB11BB, x003311BB; + u32 x00000F0F, x336600FF, x332200FF, x332200F0; + u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; + u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; + u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; + u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; + u32 x0A451047, xBBDFDD7B, xB19ACD3C; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x33CC33CC = a2 ^ a5; + + x55550000 = a1 & ~a6; + x00AA00FF = a5 & ~x55550000; + x33BB33FF = a2 | x00AA00FF; + + x33CC0000 = x33CC33CC & ~a6; + x11441144 = a1 & x33CC33CC; + x11BB11BB = a5 ^ x11441144; + x003311BB = x11BB11BB & ~x33CC0000; + + x00000F0F = a3 & a6; + x336600FF = x00AA00FF ^ x33CC0000; + x332200FF = x33BB33FF & x336600FF; + x332200F0 = x332200FF & ~x00000F0F; + + x0302000F = a3 & x332200FF; + xAAAAAAAA = ~a1; + xA9A8AAA5 = x0302000F ^ xAAAAAAAA; + x33CCCC33 = a6 ^ x33CC33CC; + x33CCC030 = x33CCCC33 & ~x00000F0F; + x9A646A95 = xA9A8AAA5 ^ x33CCC030; + x10 = a4 & ~x332200F0; + x11 = x10 ^ x9A646A95; + *out2 ^= x11; + + x00333303 = a2 & ~x33CCC030; + x118822B8 = x11BB11BB ^ x00333303; + xA8208805 = xA9A8AAA5 & ~x118822B8; + x3CC3C33C = a3 ^ x33CCCC33; + x94E34B39 = xA8208805 ^ x3CC3C33C; + x00 = x33BB33FF & ~a4; + x01 = x00 ^ x94E34B39; + *out1 ^= x01; + + x0331330C = x0302000F ^ x00333303; + x3FF3F33C = x3CC3C33C | x0331330C; + xA9DF596A = x33BB33FF ^ x9A646A95; + xA9DF5F6F = x00000F0F | xA9DF596A; + x962CAC53 = x3FF3F33C ^ xA9DF5F6F; + + xA9466A6A = x332200FF ^ x9A646A95; + x3DA52153 = x94E34B39 ^ xA9466A6A; + x29850143 = xA9DF5F6F & x3DA52153; + x33C0330C = x33CC33CC & x3FF3F33C; + x1A45324F = x29850143 ^ x33C0330C; + x20 = x1A45324F | a4; + x21 = x20 ^ x962CAC53; + *out3 ^= x21; + + x0A451047 = x1A45324F & ~x118822B8; + xBBDFDD7B = x33CCCC33 | xA9DF596A; + xB19ACD3C = x0A451047 ^ xBBDFDD7B; + x30 = x003311BB | a4; + x31 = x30 ^ xB19ACD3C; + *out4 ^= x31; +} + +void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; + u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; + u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; + u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; + u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; + u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; + u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; + u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x44444444 = a1 & ~a2; + x0F0FF0F0 = a3 ^ a6; + x4F4FF4F4 = x44444444 | x0F0FF0F0; + x00FFFF00 = a4 ^ a6; + x00AAAA00 = x00FFFF00 & ~a1; + x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; + + x3C3CC3C3 = a2 ^ x0F0FF0F0; + x3C3C0000 = x3C3CC3C3 & ~a6; + x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; + x0C840A00 = x4FE55EF4 & ~x7373F4F4; + + x00005EF4 = a6 & x4FE55EF4; + x00FF5EFF = a4 | x00005EF4; + x00555455 = a1 & x00FF5EFF; + x3C699796 = x3C3CC3C3 ^ x00555455; + x30 = x4FE55EF4 & ~a5; + x31 = x30 ^ x3C699796; + *out4 ^= x31; + + x000FF000 = x0F0FF0F0 & x00FFFF00; + x55AA55AA = a1 ^ a4; + x26D9A15E = x7373F4F4 ^ x55AA55AA; + x2FDFAF5F = a3 | x26D9A15E; + x2FD00F5F = x2FDFAF5F & ~x000FF000; + + x55AAFFAA = x00AAAA00 | x55AA55AA; + x28410014 = x3C699796 & ~x55AAFFAA; + x000000FF = a4 & a6; + x000000CC = x000000FF & ~a2; + x284100D8 = x28410014 ^ x000000CC; + + x204100D0 = x7373F4F4 & x284100D8; + x3C3CC3FF = x3C3CC3C3 | x000000FF; + x1C3CC32F = x3C3CC3FF & ~x204100D0; + x4969967A = a1 ^ x1C3CC32F; + x10 = x2FD00F5F & a5; + x11 = x10 ^ x4969967A; + *out2 ^= x11; + + x4CC44CC4 = x4FE55EF4 & ~a2; + x40C040C0 = x4CC44CC4 & ~a3; + xC3C33C3C = ~x3C3CC3C3; + x9669C396 = x55AAFFAA ^ xC3C33C3C; + xD6A98356 = x40C040C0 ^ x9669C396; + x00 = a5 & ~x0C840A00; + x01 = x00 ^ xD6A98356; + *out1 ^= x01; + + xD6E9C3D6 = x40C040C0 | x9669C396; + x4CEEEEC4 = x00AAAA00 | x4CC44CC4; + x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; + x001A000B = a4 & ~x4FE55EF4; + x9A1F2D1B = x9A072D12 | x001A000B; + x20 = a5 & ~x284100D8; + x21 = x20 ^ x9A1F2D1B; + *out3 ^= x21; +} + +void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x5A5A5A5A, x0F0FF0F0; + u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, + x52FBCA0F, x61C8F93C; + u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; + u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; + u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x5A5A5A5A = a1 ^ a3; + x0F0FF0F0 = a3 ^ a5; + x33FF33FF = a2 | a4; + x33FFCC00 = a5 ^ x33FF33FF; + x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; + x0C0CC0C0 = x0F0FF0F0 & ~a2; + x0CF3C03F = a4 ^ x0C0CC0C0; + x5EFBDA7F = x5A5A5A5A | x0CF3C03F; + x52FBCA0F = x5EFBDA7F & ~x0C0030F0; + x61C8F93C = a2 ^ x52FBCA0F; + + x00C0C03C = x0CF3C03F & x61C8F93C; + x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; + x3B92A366 = x5A5A5A5A ^ x61C8F93C; + x30908326 = x3B92A366 & ~x0F0F30C0; + x3C90B3D6 = x0C0030F0 ^ x30908326; + + x33CC33CC = a2 ^ a4; + x0C0CFFFF = a5 | x0C0CC0C0; + x379E5C99 = x3B92A366 ^ x0C0CFFFF; + x04124C11 = x379E5C99 & ~x33CC33CC; + x56E9861E = x52FBCA0F ^ x04124C11; + x00 = a6 & ~x3C90B3D6; + x01 = x00 ^ x56E9861E; + *out1 ^= x01; + + xA91679E1 = ~x56E9861E; + x10 = x3C90B3D6 & ~a6; + x11 = x10 ^ xA91679E1; + *out2 ^= x11; + + x9586CA37 = x3C90B3D6 ^ xA91679E1; + x8402C833 = x9586CA37 & ~x33CC33CC; + x84C2C83F = x00C0C03C | x8402C833; + xB35C94A6 = x379E5C99 ^ x84C2C83F; + x20 = x61C8F93C | a6; + x21 = x20 ^ xB35C94A6; + *out3 ^= x21; + + x30 = a6 & x61C8F93C; + x31 = x30 ^ xB35C94A6; + *out4 ^= x31; +} + +void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; + u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; + u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; + u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; + u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; + u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; + u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; + u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x77777777 = a1 | a3; + x77770000 = x77777777 & ~a6; + x22225555 = a1 ^ x77770000; + x11116666 = a3 ^ x22225555; + x1F1F6F6F = a4 | x11116666; + + x70700000 = x77770000 & ~a4; + x43433333 = a3 ^ x70700000; + x00430033 = a5 & x43433333; + x55557777 = a1 | x11116666; + x55167744 = x00430033 ^ x55557777; + x5A19784B = a4 ^ x55167744; + + x5A1987B4 = a6 ^ x5A19784B; + x7A3BD7F5 = x22225555 | x5A1987B4; + x003B00F5 = a5 & x7A3BD7F5; + x221955A0 = x22225555 ^ x003B00F5; + x05050707 = a4 & x55557777; + x271C52A7 = x221955A0 ^ x05050707; + + x2A2A82A0 = x7A3BD7F5 & ~a1; + x6969B193 = x43433333 ^ x2A2A82A0; + x1FE06F90 = a5 ^ x1F1F6F6F; + x16804E00 = x1FE06F90 & ~x6969B193; + xE97FB1FF = ~x16804E00; + x20 = xE97FB1FF & ~a2; + x21 = x20 ^ x5A19784B; + *out3 ^= x21; + + x43403302 = x43433333 & ~x003B00F5; + x35CAED30 = x2A2A82A0 ^ x1FE06F90; + x37DEFFB7 = x271C52A7 | x35CAED30; + x349ECCB5 = x37DEFFB7 & ~x43403302; + x0B01234A = x1F1F6F6F & ~x349ECCB5; + + x101884B4 = x5A1987B4 & x349ECCB5; + x0FF8EB24 = x1FE06F90 ^ x101884B4; + x41413333 = x43433333 & x55557777; + x4FF9FB37 = x0FF8EB24 | x41413333; + x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; + x30 = x4FC2FBC2 & a2; + x31 = x30 ^ x271C52A7; + *out4 ^= x31; + + x22222222 = a1 ^ x77777777; + x16BCEE97 = x349ECCB5 ^ x22222222; + x0F080B04 = a4 & x0FF8EB24; + x19B4E593 = x16BCEE97 ^ x0F080B04; + x00 = x0B01234A | a2; + x01 = x00 ^ x19B4E593; + *out1 ^= x01; + + x5C5C5C5C = x1F1F6F6F ^ x43433333; + x4448184C = x5C5C5C5C & ~x19B4E593; + x2DDABE71 = x22225555 ^ x0FF8EB24; + x6992A63D = x4448184C ^ x2DDABE71; + x10 = x1F1F6F6F & a2; + x11 = x10 ^ x6992A63D; + *out2 ^= x11; +} + +void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x33CC33CC; + u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; + u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; + u32 x09030C06, x09030000, x336622FF, x3A6522FF; + u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; + u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; + u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; + u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; + u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x33CC33CC = a2 ^ a5; + + x3333FFFF = a2 | a6; + x11115555 = a1 & x3333FFFF; + x22DD6699 = x33CC33CC ^ x11115555; + x22DD9966 = a6 ^ x22DD6699; + x00220099 = a5 & ~x22DD9966; + + x00551144 = a1 & x22DD9966; + x33662277 = a2 ^ x00551144; + x5A5A5A5A = a1 ^ a3; + x7B7E7A7F = x33662277 | x5A5A5A5A; + x59A31CE6 = x22DD6699 ^ x7B7E7A7F; + + x09030C06 = a3 & x59A31CE6; + x09030000 = x09030C06 & ~a6; + x336622FF = x00220099 | x33662277; + x3A6522FF = x09030000 ^ x336622FF; + x30 = x3A6522FF & a4; + x31 = x30 ^ x59A31CE6; + *out4 ^= x31; + + x484D494C = a2 ^ x7B7E7A7F; + x0000B6B3 = a6 & ~x484D494C; + x0F0FB9BC = a3 ^ x0000B6B3; + x00FC00F9 = a5 & ~x09030C06; + x0FFFB9FD = x0F0FB9BC | x00FC00F9; + + x5DF75DF7 = a1 | x59A31CE6; + x116600F7 = x336622FF & x5DF75DF7; + x1E69B94B = x0F0FB9BC ^ x116600F7; + x1668B94B = x1E69B94B & ~x09030000; + x20 = x00220099 | a4; + x21 = x20 ^ x1668B94B; + *out3 ^= x21; + + x7B7B7B7B = a2 | x5A5A5A5A; + x411E5984 = x3A6522FF ^ x7B7B7B7B; + x1FFFFDFD = x11115555 | x0FFFB9FD; + x5EE1A479 = x411E5984 ^ x1FFFFDFD; + + x3CB4DFD2 = x22DD6699 ^ x1E69B94B; + x004B002D = a5 & ~x3CB4DFD2; + xB7B2B6B3 = ~x484D494C; + xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; + xCC82CDE5 = x004B002D ^ xCCC9CDC8; + x10 = xCC82CDE5 & ~a4; + x11 = x10 ^ x5EE1A479; + *out2 ^= x11; + + x0055EEBB = a6 ^ x00551144; + x5A5AECE9 = a1 ^ x0F0FB9BC; + x0050ECA9 = x0055EEBB & x5A5AECE9; + xC5CAC1CE = x09030C06 ^ xCCC9CDC8; + xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; + x00 = x0FFFB9FD & ~a4; + x01 = x00 ^ xC59A2D67; + *out1 ^= x01; +} + +void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; + u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; + u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; + u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; + u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; + u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; + u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; + u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x0FF00FF0 = a4 ^ a5; + x3CC33CC3 = a3 ^ x0FF00FF0; + x00003CC3 = a6 & x3CC33CC3; + x0F000F00 = a4 & x0FF00FF0; + x5A555A55 = a2 ^ x0F000F00; + x00001841 = x00003CC3 & x5A555A55; + + x00000F00 = a6 & x0F000F00; + x33333C33 = a3 ^ x00000F00; + x7B777E77 = x5A555A55 | x33333C33; + x0FF0F00F = a6 ^ x0FF00FF0; + x74878E78 = x7B777E77 ^ x0FF0F00F; + x30 = a1 & ~x00001841; + x31 = x30 ^ x74878E78; + *out4 ^= x31; + + x003C003C = a5 & ~x3CC33CC3; + x5A7D5A7D = x5A555A55 | x003C003C; + x333300F0 = x00003CC3 ^ x33333C33; + x694E5A8D = x5A7D5A7D ^ x333300F0; + + x0FF0CCCC = x00003CC3 ^ x0FF0F00F; + x000F0303 = a4 & ~x0FF0CCCC; + x5A505854 = x5A555A55 & ~x000F0303; + x33CC000F = a5 ^ x333300F0; + x699C585B = x5A505854 ^ x33CC000F; + + x7F878F78 = x0F000F00 | x74878E78; + x21101013 = a3 & x699C585B; + x7F979F7B = x7F878F78 | x21101013; + x30030CC0 = x3CC33CC3 & ~x0FF0F00F; + x4F9493BB = x7F979F7B ^ x30030CC0; + x00 = x4F9493BB & ~a1; + x01 = x00 ^ x694E5A8D; + *out1 ^= x01; + + x6F9CDBFB = x699C585B | x4F9493BB; + x0000DBFB = a6 & x6F9CDBFB; + x00005151 = a2 & x0000DBFB; + x26DAC936 = x694E5A8D ^ x4F9493BB; + x26DA9867 = x00005151 ^ x26DAC936; + + x27DA9877 = x21101013 | x26DA9867; + x27DA438C = x0000DBFB ^ x27DA9877; + x2625C9C9 = a5 ^ x26DAC936; + x27FFCBCD = x27DA438C | x2625C9C9; + x20 = x27FFCBCD & a1; + x21 = x20 ^ x699C585B; + *out3 ^= x21; + + x27FF1036 = x0000DBFB ^ x27FFCBCD; + x27FF103E = x003C003C | x27FF1036; + xB06B6C44 = ~x4F9493BB; + x97947C7A = x27FF103E ^ xB06B6C44; + x10 = x97947C7A & ~a1; + x11 = x10 ^ x26DA9867; + *out2 ^= x11; +} + +void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; + u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; + u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; + u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; + u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; + u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; + u32 xF700A600, x61008000, x03B7856B, x62B7056B; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x0C0C0C0C = a3 & ~a2; + x0000F0F0 = a5 & ~a3; + x00FFF00F = a4 ^ x0000F0F0; + x00555005 = a1 & x00FFF00F; + x00515001 = x00555005 & ~x0C0C0C0C; + + x33000330 = a2 & ~x00FFF00F; + x77555775 = a1 | x33000330; + x30303030 = a2 & ~a3; + x3030CFCF = a5 ^ x30303030; + x30104745 = x77555775 & x3030CFCF; + x30555745 = x00555005 | x30104745; + + xFF000FF0 = ~x00FFF00F; + xCF1048B5 = x30104745 ^ xFF000FF0; + x080A080A = a3 & ~x77555775; + xC71A40BF = xCF1048B5 ^ x080A080A; + xCB164CB3 = x0C0C0C0C ^ xC71A40BF; + x10 = x00515001 | a6; + x11 = x10 ^ xCB164CB3; + *out2 ^= x11; + + x9E4319E6 = a1 ^ xCB164CB3; + x000019E6 = a5 & x9E4319E6; + xF429738C = a2 ^ xC71A40BF; + xF4296A6A = x000019E6 ^ xF429738C; + xC729695A = x33000330 ^ xF4296A6A; + + xC47C3D2F = x30555745 ^ xF4296A6A; + xF77F3F3F = a2 | xC47C3D2F; + x9E43E619 = a5 ^ x9E4319E6; + x693CD926 = xF77F3F3F ^ x9E43E619; + x20 = x30555745 & a6; + x21 = x20 ^ x693CD926; + *out3 ^= x21; + + xF719A695 = x3030CFCF ^ xC729695A; + xF4FF73FF = a4 | xF429738C; + x03E6D56A = xF719A695 ^ xF4FF73FF; + x56B3803F = a1 ^ x03E6D56A; + x30 = x56B3803F & a6; + x31 = x30 ^ xC729695A; + *out4 ^= x31; + + xF700A600 = xF719A695 & ~a4; + x61008000 = x693CD926 & xF700A600; + x03B7856B = x00515001 ^ x03E6D56A; + x62B7056B = x61008000 ^ x03B7856B; + x00 = x62B7056B | a6; + x01 = x00 ^ xC729695A; + *out1 ^= x01; +} + +#endif + +#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; } + +#define DATASWAP \ + SWAP(D00, D32); \ + SWAP(D01, D33); \ + SWAP(D02, D34); \ + SWAP(D03, D35); \ + SWAP(D04, D36); \ + SWAP(D05, D37); \ + SWAP(D06, D38); \ + SWAP(D07, D39); \ + SWAP(D08, D40); \ + SWAP(D09, D41); \ + SWAP(D10, D42); \ + SWAP(D11, D43); \ + SWAP(D12, D44); \ + SWAP(D13, D45); \ + SWAP(D14, D46); \ + SWAP(D15, D47); \ + SWAP(D16, D48); \ + SWAP(D17, D49); \ + SWAP(D18, D50); \ + SWAP(D19, D51); \ + SWAP(D20, D52); \ + SWAP(D21, D53); \ + SWAP(D22, D54); \ + SWAP(D23, D55); \ + SWAP(D24, D56); \ + SWAP(D25, D57); \ + SWAP(D26, D58); \ + SWAP(D27, D59); \ + SWAP(D28, D60); \ + SWAP(D29, D61); \ + SWAP(D30, D62); \ + SWAP(D31, D63); + +#define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; } +#define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; } +#define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; } +#define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; } +#define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; } +#define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; } +#define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; } +#define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; } +#define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; } +#define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; } +#define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; } +#define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; } +#define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; } +#define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; } +#define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; } +#define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; } + +void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63) +{ + KXX_DECL u32 k00, k01, k02, k03, k04, k05; + KXX_DECL u32 k06, k07, k08, k09, k10, k11; + KXX_DECL u32 k12, k13, k14, k15, k16, k17; + KXX_DECL u32 k18, k19, k20, k21, k22, k23; + KXX_DECL u32 k24, k25, k26, k27, k28, k29; + KXX_DECL u32 k30, k31, k32, k33, k34, k35; + KXX_DECL u32 k36, k37, k38, k39, k40, k41; + KXX_DECL u32 k42, k43, k44, k45, k46, k47; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 2; i++) + { + if (i) KEYSET10 else KEYSET00 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET11 else KEYSET01 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + + if (i) KEYSET12 else KEYSET02 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET13 else KEYSET03 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + + if (i) KEYSET14 else KEYSET04 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET15 else KEYSET05 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + + if (i) KEYSET16 else KEYSET06 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET17 else KEYSET07 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + } +} + +void transpose32c (u32 data[32]) +{ + #define swap(x,y,j,m) \ + t = ((x) ^ ((y) >> (j))) & (m); \ + (x) = (x) ^ t; \ + (y) = (y) ^ (t << (j)); + + u32 t; + + swap (data[ 0], data[16], 16, 0x0000ffff); + swap (data[ 1], data[17], 16, 0x0000ffff); + swap (data[ 2], data[18], 16, 0x0000ffff); + swap (data[ 3], data[19], 16, 0x0000ffff); + swap (data[ 4], data[20], 16, 0x0000ffff); + swap (data[ 5], data[21], 16, 0x0000ffff); + swap (data[ 6], data[22], 16, 0x0000ffff); + swap (data[ 7], data[23], 16, 0x0000ffff); + swap (data[ 8], data[24], 16, 0x0000ffff); + swap (data[ 9], data[25], 16, 0x0000ffff); + swap (data[10], data[26], 16, 0x0000ffff); + swap (data[11], data[27], 16, 0x0000ffff); + swap (data[12], data[28], 16, 0x0000ffff); + swap (data[13], data[29], 16, 0x0000ffff); + swap (data[14], data[30], 16, 0x0000ffff); + swap (data[15], data[31], 16, 0x0000ffff); + swap (data[ 0], data[ 8], 8, 0x00ff00ff); + swap (data[ 1], data[ 9], 8, 0x00ff00ff); + swap (data[ 2], data[10], 8, 0x00ff00ff); + swap (data[ 3], data[11], 8, 0x00ff00ff); + swap (data[ 4], data[12], 8, 0x00ff00ff); + swap (data[ 5], data[13], 8, 0x00ff00ff); + swap (data[ 6], data[14], 8, 0x00ff00ff); + swap (data[ 7], data[15], 8, 0x00ff00ff); + swap (data[ 0], data[ 4], 4, 0x0f0f0f0f); + swap (data[ 1], data[ 5], 4, 0x0f0f0f0f); + swap (data[ 2], data[ 6], 4, 0x0f0f0f0f); + swap (data[ 3], data[ 7], 4, 0x0f0f0f0f); + swap (data[ 0], data[ 2], 2, 0x33333333); + swap (data[ 1], data[ 3], 2, 0x33333333); + swap (data[ 0], data[ 1], 1, 0x55555555); + swap (data[ 2], data[ 3], 1, 0x55555555); + swap (data[ 4], data[ 6], 2, 0x33333333); + swap (data[ 5], data[ 7], 2, 0x33333333); + swap (data[ 4], data[ 5], 1, 0x55555555); + swap (data[ 6], data[ 7], 1, 0x55555555); + swap (data[ 8], data[12], 4, 0x0f0f0f0f); + swap (data[ 9], data[13], 4, 0x0f0f0f0f); + swap (data[10], data[14], 4, 0x0f0f0f0f); + swap (data[11], data[15], 4, 0x0f0f0f0f); + swap (data[ 8], data[10], 2, 0x33333333); + swap (data[ 9], data[11], 2, 0x33333333); + swap (data[ 8], data[ 9], 1, 0x55555555); + swap (data[10], data[11], 1, 0x55555555); + swap (data[12], data[14], 2, 0x33333333); + swap (data[13], data[15], 2, 0x33333333); + swap (data[12], data[13], 1, 0x55555555); + swap (data[14], data[15], 1, 0x55555555); + swap (data[16], data[24], 8, 0x00ff00ff); + swap (data[17], data[25], 8, 0x00ff00ff); + swap (data[18], data[26], 8, 0x00ff00ff); + swap (data[19], data[27], 8, 0x00ff00ff); + swap (data[20], data[28], 8, 0x00ff00ff); + swap (data[21], data[29], 8, 0x00ff00ff); + swap (data[22], data[30], 8, 0x00ff00ff); + swap (data[23], data[31], 8, 0x00ff00ff); + swap (data[16], data[20], 4, 0x0f0f0f0f); + swap (data[17], data[21], 4, 0x0f0f0f0f); + swap (data[18], data[22], 4, 0x0f0f0f0f); + swap (data[19], data[23], 4, 0x0f0f0f0f); + swap (data[16], data[18], 2, 0x33333333); + swap (data[17], data[19], 2, 0x33333333); + swap (data[16], data[17], 1, 0x55555555); + swap (data[18], data[19], 1, 0x55555555); + swap (data[20], data[22], 2, 0x33333333); + swap (data[21], data[23], 2, 0x33333333); + swap (data[20], data[21], 1, 0x55555555); + swap (data[22], data[23], 1, 0x55555555); + swap (data[24], data[28], 4, 0x0f0f0f0f); + swap (data[25], data[29], 4, 0x0f0f0f0f); + swap (data[26], data[30], 4, 0x0f0f0f0f); + swap (data[27], data[31], 4, 0x0f0f0f0f); + swap (data[24], data[26], 2, 0x33333333); + swap (data[25], data[27], 2, 0x33333333); + swap (data[24], data[25], 1, 0x55555555); + swap (data[26], data[27], 1, 0x55555555); + swap (data[28], data[30], 2, 0x33333333); + swap (data[29], data[31], 2, 0x33333333); + swap (data[28], data[29], 1, 0x55555555); + swap (data[30], data[31], 1, 0x55555555); +} + +void m14000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + + /** + * salt + */ + + const u32 salt0 = salt_bufs[salt_pos].salt_buf_pc[0]; + const u32 salt1 = salt_bufs[salt_pos].salt_buf_pc[1]; + + // salt1 first, because this is a 64 bit value actually + + #define d00 (((salt1 >> 0) & 1) ? -1 : 0) + #define d01 (((salt1 >> 1) & 1) ? -1 : 0) + #define d02 (((salt1 >> 2) & 1) ? -1 : 0) + #define d03 (((salt1 >> 3) & 1) ? -1 : 0) + #define d04 (((salt1 >> 4) & 1) ? -1 : 0) + #define d05 (((salt1 >> 5) & 1) ? -1 : 0) + #define d06 (((salt1 >> 6) & 1) ? -1 : 0) + #define d07 (((salt1 >> 7) & 1) ? -1 : 0) + #define d08 (((salt1 >> 8) & 1) ? -1 : 0) + #define d09 (((salt1 >> 9) & 1) ? -1 : 0) + #define d10 (((salt1 >> 10) & 1) ? -1 : 0) + #define d11 (((salt1 >> 11) & 1) ? -1 : 0) + #define d12 (((salt1 >> 12) & 1) ? -1 : 0) + #define d13 (((salt1 >> 13) & 1) ? -1 : 0) + #define d14 (((salt1 >> 14) & 1) ? -1 : 0) + #define d15 (((salt1 >> 15) & 1) ? -1 : 0) + #define d16 (((salt1 >> 16) & 1) ? -1 : 0) + #define d17 (((salt1 >> 17) & 1) ? -1 : 0) + #define d18 (((salt1 >> 18) & 1) ? -1 : 0) + #define d19 (((salt1 >> 19) & 1) ? -1 : 0) + #define d20 (((salt1 >> 20) & 1) ? -1 : 0) + #define d21 (((salt1 >> 21) & 1) ? -1 : 0) + #define d22 (((salt1 >> 22) & 1) ? -1 : 0) + #define d23 (((salt1 >> 23) & 1) ? -1 : 0) + #define d24 (((salt1 >> 24) & 1) ? -1 : 0) + #define d25 (((salt1 >> 25) & 1) ? -1 : 0) + #define d26 (((salt1 >> 26) & 1) ? -1 : 0) + #define d27 (((salt1 >> 27) & 1) ? -1 : 0) + #define d28 (((salt1 >> 28) & 1) ? -1 : 0) + #define d29 (((salt1 >> 29) & 1) ? -1 : 0) + #define d30 (((salt1 >> 30) & 1) ? -1 : 0) + #define d31 (((salt1 >> 31) & 1) ? -1 : 0) + #define d32 (((salt0 >> 0) & 1) ? -1 : 0) + #define d33 (((salt0 >> 1) & 1) ? -1 : 0) + #define d34 (((salt0 >> 2) & 1) ? -1 : 0) + #define d35 (((salt0 >> 3) & 1) ? -1 : 0) + #define d36 (((salt0 >> 4) & 1) ? -1 : 0) + #define d37 (((salt0 >> 5) & 1) ? -1 : 0) + #define d38 (((salt0 >> 6) & 1) ? -1 : 0) + #define d39 (((salt0 >> 7) & 1) ? -1 : 0) + #define d40 (((salt0 >> 8) & 1) ? -1 : 0) + #define d41 (((salt0 >> 9) & 1) ? -1 : 0) + #define d42 (((salt0 >> 10) & 1) ? -1 : 0) + #define d43 (((salt0 >> 11) & 1) ? -1 : 0) + #define d44 (((salt0 >> 12) & 1) ? -1 : 0) + #define d45 (((salt0 >> 13) & 1) ? -1 : 0) + #define d46 (((salt0 >> 14) & 1) ? -1 : 0) + #define d47 (((salt0 >> 15) & 1) ? -1 : 0) + #define d48 (((salt0 >> 16) & 1) ? -1 : 0) + #define d49 (((salt0 >> 17) & 1) ? -1 : 0) + #define d50 (((salt0 >> 18) & 1) ? -1 : 0) + #define d51 (((salt0 >> 19) & 1) ? -1 : 0) + #define d52 (((salt0 >> 20) & 1) ? -1 : 0) + #define d53 (((salt0 >> 21) & 1) ? -1 : 0) + #define d54 (((salt0 >> 22) & 1) ? -1 : 0) + #define d55 (((salt0 >> 23) & 1) ? -1 : 0) + #define d56 (((salt0 >> 24) & 1) ? -1 : 0) + #define d57 (((salt0 >> 25) & 1) ? -1 : 0) + #define d58 (((salt0 >> 26) & 1) ? -1 : 0) + #define d59 (((salt0 >> 27) & 1) ? -1 : 0) + #define d60 (((salt0 >> 28) & 1) ? -1 : 0) + #define d61 (((salt0 >> 29) & 1) ? -1 : 0) + #define d62 (((salt0 >> 30) & 1) ? -1 : 0) + #define d63 (((salt0 >> 31) & 1) ? -1 : 0) + + u32 D00 = d00; + u32 D01 = d01; + u32 D02 = d02; + u32 D03 = d03; + u32 D04 = d04; + u32 D05 = d05; + u32 D06 = d06; + u32 D07 = d07; + u32 D08 = d08; + u32 D09 = d09; + u32 D10 = d10; + u32 D11 = d11; + u32 D12 = d12; + u32 D13 = d13; + u32 D14 = d14; + u32 D15 = d15; + u32 D16 = d16; + u32 D17 = d17; + u32 D18 = d18; + u32 D19 = d19; + u32 D20 = d20; + u32 D21 = d21; + u32 D22 = d22; + u32 D23 = d23; + u32 D24 = d24; + u32 D25 = d25; + u32 D26 = d26; + u32 D27 = d27; + u32 D28 = d28; + u32 D29 = d29; + u32 D30 = d30; + u32 D31 = d31; + u32 D32 = d32; + u32 D33 = d33; + u32 D34 = d34; + u32 D35 = d35; + u32 D36 = d36; + u32 D37 = d37; + u32 D38 = d38; + u32 D39 = d39; + u32 D40 = d40; + u32 D41 = d41; + u32 D42 = d42; + u32 D43 = d43; + u32 D44 = d44; + u32 D45 = d45; + u32 D46 = d46; + u32 D47 = d47; + u32 D48 = d48; + u32 D49 = d49; + u32 D50 = d50; + u32 D51 = d51; + u32 D52 = d52; + u32 D53 = d53; + u32 D54 = d54; + u32 D55 = d55; + u32 D56 = d56; + u32 D57 = d57; + u32 D58 = d58; + u32 D59 = d59; + u32 D60 = d60; + u32 D61 = d61; + u32 D62 = d62; + u32 D63 = d63; + + /** + * digest + */ + + const u32 s0 = digests_buf[0].digest_buf[0]; + const u32 s1 = digests_buf[0].digest_buf[1]; + + #define S00 (((s0 >> 0) & 1) ? -1 : 0) + #define S01 (((s0 >> 1) & 1) ? -1 : 0) + #define S02 (((s0 >> 2) & 1) ? -1 : 0) + #define S03 (((s0 >> 3) & 1) ? -1 : 0) + #define S04 (((s0 >> 4) & 1) ? -1 : 0) + #define S05 (((s0 >> 5) & 1) ? -1 : 0) + #define S06 (((s0 >> 6) & 1) ? -1 : 0) + #define S07 (((s0 >> 7) & 1) ? -1 : 0) + #define S08 (((s0 >> 8) & 1) ? -1 : 0) + #define S09 (((s0 >> 9) & 1) ? -1 : 0) + #define S10 (((s0 >> 10) & 1) ? -1 : 0) + #define S11 (((s0 >> 11) & 1) ? -1 : 0) + #define S12 (((s0 >> 12) & 1) ? -1 : 0) + #define S13 (((s0 >> 13) & 1) ? -1 : 0) + #define S14 (((s0 >> 14) & 1) ? -1 : 0) + #define S15 (((s0 >> 15) & 1) ? -1 : 0) + #define S16 (((s0 >> 16) & 1) ? -1 : 0) + #define S17 (((s0 >> 17) & 1) ? -1 : 0) + #define S18 (((s0 >> 18) & 1) ? -1 : 0) + #define S19 (((s0 >> 19) & 1) ? -1 : 0) + #define S20 (((s0 >> 20) & 1) ? -1 : 0) + #define S21 (((s0 >> 21) & 1) ? -1 : 0) + #define S22 (((s0 >> 22) & 1) ? -1 : 0) + #define S23 (((s0 >> 23) & 1) ? -1 : 0) + #define S24 (((s0 >> 24) & 1) ? -1 : 0) + #define S25 (((s0 >> 25) & 1) ? -1 : 0) + #define S26 (((s0 >> 26) & 1) ? -1 : 0) + #define S27 (((s0 >> 27) & 1) ? -1 : 0) + #define S28 (((s0 >> 28) & 1) ? -1 : 0) + #define S29 (((s0 >> 29) & 1) ? -1 : 0) + #define S30 (((s0 >> 30) & 1) ? -1 : 0) + #define S31 (((s0 >> 31) & 1) ? -1 : 0) + #define S32 (((s1 >> 0) & 1) ? -1 : 0) + #define S33 (((s1 >> 1) & 1) ? -1 : 0) + #define S34 (((s1 >> 2) & 1) ? -1 : 0) + #define S35 (((s1 >> 3) & 1) ? -1 : 0) + #define S36 (((s1 >> 4) & 1) ? -1 : 0) + #define S37 (((s1 >> 5) & 1) ? -1 : 0) + #define S38 (((s1 >> 6) & 1) ? -1 : 0) + #define S39 (((s1 >> 7) & 1) ? -1 : 0) + #define S40 (((s1 >> 8) & 1) ? -1 : 0) + #define S41 (((s1 >> 9) & 1) ? -1 : 0) + #define S42 (((s1 >> 10) & 1) ? -1 : 0) + #define S43 (((s1 >> 11) & 1) ? -1 : 0) + #define S44 (((s1 >> 12) & 1) ? -1 : 0) + #define S45 (((s1 >> 13) & 1) ? -1 : 0) + #define S46 (((s1 >> 14) & 1) ? -1 : 0) + #define S47 (((s1 >> 15) & 1) ? -1 : 0) + #define S48 (((s1 >> 16) & 1) ? -1 : 0) + #define S49 (((s1 >> 17) & 1) ? -1 : 0) + #define S50 (((s1 >> 18) & 1) ? -1 : 0) + #define S51 (((s1 >> 19) & 1) ? -1 : 0) + #define S52 (((s1 >> 20) & 1) ? -1 : 0) + #define S53 (((s1 >> 21) & 1) ? -1 : 0) + #define S54 (((s1 >> 22) & 1) ? -1 : 0) + #define S55 (((s1 >> 23) & 1) ? -1 : 0) + #define S56 (((s1 >> 24) & 1) ? -1 : 0) + #define S57 (((s1 >> 25) & 1) ? -1 : 0) + #define S58 (((s1 >> 26) & 1) ? -1 : 0) + #define S59 (((s1 >> 27) & 1) ? -1 : 0) + #define S60 (((s1 >> 28) & 1) ? -1 : 0) + #define S61 (((s1 >> 29) & 1) ? -1 : 0) + #define S62 (((s1 >> 30) & 1) ? -1 : 0) + #define S63 (((s1 >> 31) & 1) ? -1 : 0) + + /** + * base + */ + + const u32 w0 = pws[gid].i[0]; + const u32 w1 = pws[gid].i[1]; + + #define K00 (((w0 >> ( 0 + 7)) & 1) ? -1 : 0) + #define K01 (((w0 >> ( 0 + 6)) & 1) ? -1 : 0) + #define K02 (((w0 >> ( 0 + 5)) & 1) ? -1 : 0) + #define K03 (((w0 >> ( 0 + 4)) & 1) ? -1 : 0) + #define K04 (((w0 >> ( 0 + 3)) & 1) ? -1 : 0) + #define K05 (((w0 >> ( 0 + 2)) & 1) ? -1 : 0) + #define K06 (((w0 >> ( 0 + 1)) & 1) ? -1 : 0) + #define K07 (((w0 >> ( 8 + 7)) & 1) ? -1 : 0) + #define K08 (((w0 >> ( 8 + 6)) & 1) ? -1 : 0) + #define K09 (((w0 >> ( 8 + 5)) & 1) ? -1 : 0) + #define K10 (((w0 >> ( 8 + 4)) & 1) ? -1 : 0) + #define K11 (((w0 >> ( 8 + 3)) & 1) ? -1 : 0) + #define K12 (((w0 >> ( 8 + 2)) & 1) ? -1 : 0) + #define K13 (((w0 >> ( 8 + 1)) & 1) ? -1 : 0) + #define K14 (((w0 >> (16 + 7)) & 1) ? -1 : 0) + #define K15 (((w0 >> (16 + 6)) & 1) ? -1 : 0) + #define K16 (((w0 >> (16 + 5)) & 1) ? -1 : 0) + #define K17 (((w0 >> (16 + 4)) & 1) ? -1 : 0) + #define K18 (((w0 >> (16 + 3)) & 1) ? -1 : 0) + #define K19 (((w0 >> (16 + 2)) & 1) ? -1 : 0) + #define K20 (((w0 >> (16 + 1)) & 1) ? -1 : 0) + #define K21 (((w0 >> (24 + 7)) & 1) ? -1 : 0) + #define K22 (((w0 >> (24 + 6)) & 1) ? -1 : 0) + #define K23 (((w0 >> (24 + 5)) & 1) ? -1 : 0) + #define K24 (((w0 >> (24 + 4)) & 1) ? -1 : 0) + #define K25 (((w0 >> (24 + 3)) & 1) ? -1 : 0) + #define K26 (((w0 >> (24 + 2)) & 1) ? -1 : 0) + #define K27 (((w0 >> (24 + 1)) & 1) ? -1 : 0) + #define K28 (((w1 >> ( 0 + 7)) & 1) ? -1 : 0) + #define K29 (((w1 >> ( 0 + 6)) & 1) ? -1 : 0) + #define K30 (((w1 >> ( 0 + 5)) & 1) ? -1 : 0) + #define K31 (((w1 >> ( 0 + 4)) & 1) ? -1 : 0) + #define K32 (((w1 >> ( 0 + 3)) & 1) ? -1 : 0) + #define K33 (((w1 >> ( 0 + 2)) & 1) ? -1 : 0) + #define K34 (((w1 >> ( 0 + 1)) & 1) ? -1 : 0) + #define K35 (((w1 >> ( 8 + 7)) & 1) ? -1 : 0) + #define K36 (((w1 >> ( 8 + 6)) & 1) ? -1 : 0) + #define K37 (((w1 >> ( 8 + 5)) & 1) ? -1 : 0) + #define K38 (((w1 >> ( 8 + 4)) & 1) ? -1 : 0) + #define K39 (((w1 >> ( 8 + 3)) & 1) ? -1 : 0) + #define K40 (((w1 >> ( 8 + 2)) & 1) ? -1 : 0) + #define K41 (((w1 >> ( 8 + 1)) & 1) ? -1 : 0) + #define K42 (((w1 >> (16 + 7)) & 1) ? -1 : 0) + #define K43 (((w1 >> (16 + 6)) & 1) ? -1 : 0) + #define K44 (((w1 >> (16 + 5)) & 1) ? -1 : 0) + #define K45 (((w1 >> (16 + 4)) & 1) ? -1 : 0) + #define K46 (((w1 >> (16 + 3)) & 1) ? -1 : 0) + #define K47 (((w1 >> (16 + 2)) & 1) ? -1 : 0) + #define K48 (((w1 >> (16 + 1)) & 1) ? -1 : 0) + #define K49 (((w1 >> (24 + 7)) & 1) ? -1 : 0) + #define K50 (((w1 >> (24 + 6)) & 1) ? -1 : 0) + #define K51 (((w1 >> (24 + 5)) & 1) ? -1 : 0) + #define K52 (((w1 >> (24 + 4)) & 1) ? -1 : 0) + #define K53 (((w1 >> (24 + 3)) & 1) ? -1 : 0) + #define K54 (((w1 >> (24 + 2)) & 1) ? -1 : 0) + #define K55 (((w1 >> (24 + 1)) & 1) ? -1 : 0) + + /** + * inner loop + */ + + const u32 pc_pos = get_local_id (1); + + const u32 il_pos = pc_pos * 32; + + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; + + k00 |= words_buf_r[pc_pos].b[ 0]; + k01 |= words_buf_r[pc_pos].b[ 1]; + k02 |= words_buf_r[pc_pos].b[ 2]; + k03 |= words_buf_r[pc_pos].b[ 3]; + k04 |= words_buf_r[pc_pos].b[ 4]; + k05 |= words_buf_r[pc_pos].b[ 5]; + k06 |= words_buf_r[pc_pos].b[ 6]; + k07 |= words_buf_r[pc_pos].b[ 7]; + k08 |= words_buf_r[pc_pos].b[ 8]; + k09 |= words_buf_r[pc_pos].b[ 9]; + k10 |= words_buf_r[pc_pos].b[10]; + k11 |= words_buf_r[pc_pos].b[11]; + k12 |= words_buf_r[pc_pos].b[12]; + k13 |= words_buf_r[pc_pos].b[13]; + k14 |= words_buf_r[pc_pos].b[14]; + k15 |= words_buf_r[pc_pos].b[15]; + k16 |= words_buf_r[pc_pos].b[16]; + k17 |= words_buf_r[pc_pos].b[17]; + k18 |= words_buf_r[pc_pos].b[18]; + k19 |= words_buf_r[pc_pos].b[19]; + k20 |= words_buf_r[pc_pos].b[20]; + k21 |= words_buf_r[pc_pos].b[21]; + k22 |= words_buf_r[pc_pos].b[22]; + k23 |= words_buf_r[pc_pos].b[23]; + k24 |= words_buf_r[pc_pos].b[24]; + k25 |= words_buf_r[pc_pos].b[25]; + k26 |= words_buf_r[pc_pos].b[26]; + k27 |= words_buf_r[pc_pos].b[27]; + + DES + ( + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + K28, K29, K30, K31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); + + + u32 out[64]; + + out[ 0] = D00; + out[ 1] = D01; + out[ 2] = D02; + out[ 3] = D03; + out[ 4] = D04; + out[ 5] = D05; + out[ 6] = D06; + out[ 7] = D07; + out[ 8] = D08; + out[ 9] = D09; + out[10] = D10; + out[11] = D11; + out[12] = D12; + out[13] = D13; + out[14] = D14; + out[15] = D15; + out[16] = D16; + out[17] = D17; + out[18] = D18; + out[19] = D19; + out[20] = D20; + out[21] = D21; + out[22] = D22; + out[23] = D23; + out[24] = D24; + out[25] = D25; + out[26] = D26; + out[27] = D27; + out[28] = D28; + out[29] = D29; + out[30] = D30; + out[31] = D31; + out[32] = D32; + out[33] = D33; + out[34] = D34; + out[35] = D35; + out[36] = D36; + out[37] = D37; + out[38] = D38; + out[39] = D39; + out[40] = D40; + out[41] = D41; + out[42] = D42; + out[43] = D43; + out[44] = D44; + out[45] = D45; + out[46] = D46; + out[47] = D47; + out[48] = D48; + out[49] = D49; + out[50] = D50; + out[51] = D51; + out[52] = D52; + out[53] = D53; + out[54] = D54; + out[55] = D55; + out[56] = D56; + out[57] = D57; + out[58] = D58; + out[59] = D59; + out[60] = D60; + out[61] = D61; + out[62] = D62; + out[63] = D63; + + if (digests_cnt < 16) + { + for (u32 d = 0; d < digests_cnt; d++) + { + const u32 final_hash_pos = digests_offset + d; + + if (hashes_shown[final_hash_pos]) continue; + + u32 search[2]; + + search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; + search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; + + u32 tmpResult = 0; + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 32; i++) + { + const u32 b0 = -((search[0] >> i) & 1); + const u32 b1 = -((search[1] >> i) & 1); + + tmpResult |= out[ 0 + i] ^ b0; + tmpResult |= out[32 + i] ^ b1; + } + + if (tmpResult == 0xffffffff) continue; + + const u32 slice = 31 - clz (~tmpResult); + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = 0; + const u32 r3 = 0; + + #include COMPARE_M + } + } + else + { + u32 out0[32]; + u32 out1[32]; + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 32; i++) + { + out0[i] = out[ 0 + 31 - i]; + out1[i] = out[32 + 31 - i]; + } + + transpose32c (out0); + transpose32c (out1); + + #ifdef _unroll + #pragma unroll + #endif + for (int slice = 0; slice < 32; slice++) + { + const u32 r0 = out0[31 - slice]; + const u32 r1 = out1[31 - slice]; + const u32 r2 = 0; + const u32 r3 = 0; + + #include COMPARE_M + } + } +} + +void m14000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + + /** + * salt + */ + + const u32 salt0 = salt_bufs[salt_pos].salt_buf_pc[0]; + const u32 salt1 = salt_bufs[salt_pos].salt_buf_pc[1]; + + // salt1 first, because this is a 64 bit value actually + + #define d00 (((salt1 >> 0) & 1) ? -1 : 0) + #define d01 (((salt1 >> 1) & 1) ? -1 : 0) + #define d02 (((salt1 >> 2) & 1) ? -1 : 0) + #define d03 (((salt1 >> 3) & 1) ? -1 : 0) + #define d04 (((salt1 >> 4) & 1) ? -1 : 0) + #define d05 (((salt1 >> 5) & 1) ? -1 : 0) + #define d06 (((salt1 >> 6) & 1) ? -1 : 0) + #define d07 (((salt1 >> 7) & 1) ? -1 : 0) + #define d08 (((salt1 >> 8) & 1) ? -1 : 0) + #define d09 (((salt1 >> 9) & 1) ? -1 : 0) + #define d10 (((salt1 >> 10) & 1) ? -1 : 0) + #define d11 (((salt1 >> 11) & 1) ? -1 : 0) + #define d12 (((salt1 >> 12) & 1) ? -1 : 0) + #define d13 (((salt1 >> 13) & 1) ? -1 : 0) + #define d14 (((salt1 >> 14) & 1) ? -1 : 0) + #define d15 (((salt1 >> 15) & 1) ? -1 : 0) + #define d16 (((salt1 >> 16) & 1) ? -1 : 0) + #define d17 (((salt1 >> 17) & 1) ? -1 : 0) + #define d18 (((salt1 >> 18) & 1) ? -1 : 0) + #define d19 (((salt1 >> 19) & 1) ? -1 : 0) + #define d20 (((salt1 >> 20) & 1) ? -1 : 0) + #define d21 (((salt1 >> 21) & 1) ? -1 : 0) + #define d22 (((salt1 >> 22) & 1) ? -1 : 0) + #define d23 (((salt1 >> 23) & 1) ? -1 : 0) + #define d24 (((salt1 >> 24) & 1) ? -1 : 0) + #define d25 (((salt1 >> 25) & 1) ? -1 : 0) + #define d26 (((salt1 >> 26) & 1) ? -1 : 0) + #define d27 (((salt1 >> 27) & 1) ? -1 : 0) + #define d28 (((salt1 >> 28) & 1) ? -1 : 0) + #define d29 (((salt1 >> 29) & 1) ? -1 : 0) + #define d30 (((salt1 >> 30) & 1) ? -1 : 0) + #define d31 (((salt1 >> 31) & 1) ? -1 : 0) + #define d32 (((salt0 >> 0) & 1) ? -1 : 0) + #define d33 (((salt0 >> 1) & 1) ? -1 : 0) + #define d34 (((salt0 >> 2) & 1) ? -1 : 0) + #define d35 (((salt0 >> 3) & 1) ? -1 : 0) + #define d36 (((salt0 >> 4) & 1) ? -1 : 0) + #define d37 (((salt0 >> 5) & 1) ? -1 : 0) + #define d38 (((salt0 >> 6) & 1) ? -1 : 0) + #define d39 (((salt0 >> 7) & 1) ? -1 : 0) + #define d40 (((salt0 >> 8) & 1) ? -1 : 0) + #define d41 (((salt0 >> 9) & 1) ? -1 : 0) + #define d42 (((salt0 >> 10) & 1) ? -1 : 0) + #define d43 (((salt0 >> 11) & 1) ? -1 : 0) + #define d44 (((salt0 >> 12) & 1) ? -1 : 0) + #define d45 (((salt0 >> 13) & 1) ? -1 : 0) + #define d46 (((salt0 >> 14) & 1) ? -1 : 0) + #define d47 (((salt0 >> 15) & 1) ? -1 : 0) + #define d48 (((salt0 >> 16) & 1) ? -1 : 0) + #define d49 (((salt0 >> 17) & 1) ? -1 : 0) + #define d50 (((salt0 >> 18) & 1) ? -1 : 0) + #define d51 (((salt0 >> 19) & 1) ? -1 : 0) + #define d52 (((salt0 >> 20) & 1) ? -1 : 0) + #define d53 (((salt0 >> 21) & 1) ? -1 : 0) + #define d54 (((salt0 >> 22) & 1) ? -1 : 0) + #define d55 (((salt0 >> 23) & 1) ? -1 : 0) + #define d56 (((salt0 >> 24) & 1) ? -1 : 0) + #define d57 (((salt0 >> 25) & 1) ? -1 : 0) + #define d58 (((salt0 >> 26) & 1) ? -1 : 0) + #define d59 (((salt0 >> 27) & 1) ? -1 : 0) + #define d60 (((salt0 >> 28) & 1) ? -1 : 0) + #define d61 (((salt0 >> 29) & 1) ? -1 : 0) + #define d62 (((salt0 >> 30) & 1) ? -1 : 0) + #define d63 (((salt0 >> 31) & 1) ? -1 : 0) + + u32 D00 = d00; + u32 D01 = d01; + u32 D02 = d02; + u32 D03 = d03; + u32 D04 = d04; + u32 D05 = d05; + u32 D06 = d06; + u32 D07 = d07; + u32 D08 = d08; + u32 D09 = d09; + u32 D10 = d10; + u32 D11 = d11; + u32 D12 = d12; + u32 D13 = d13; + u32 D14 = d14; + u32 D15 = d15; + u32 D16 = d16; + u32 D17 = d17; + u32 D18 = d18; + u32 D19 = d19; + u32 D20 = d20; + u32 D21 = d21; + u32 D22 = d22; + u32 D23 = d23; + u32 D24 = d24; + u32 D25 = d25; + u32 D26 = d26; + u32 D27 = d27; + u32 D28 = d28; + u32 D29 = d29; + u32 D30 = d30; + u32 D31 = d31; + u32 D32 = d32; + u32 D33 = d33; + u32 D34 = d34; + u32 D35 = d35; + u32 D36 = d36; + u32 D37 = d37; + u32 D38 = d38; + u32 D39 = d39; + u32 D40 = d40; + u32 D41 = d41; + u32 D42 = d42; + u32 D43 = d43; + u32 D44 = d44; + u32 D45 = d45; + u32 D46 = d46; + u32 D47 = d47; + u32 D48 = d48; + u32 D49 = d49; + u32 D50 = d50; + u32 D51 = d51; + u32 D52 = d52; + u32 D53 = d53; + u32 D54 = d54; + u32 D55 = d55; + u32 D56 = d56; + u32 D57 = d57; + u32 D58 = d58; + u32 D59 = d59; + u32 D60 = d60; + u32 D61 = d61; + u32 D62 = d62; + u32 D63 = d63; + + /** + * digest + */ + + const u32 s0 = digests_buf[0].digest_buf[0]; + const u32 s1 = digests_buf[0].digest_buf[1]; + + #define S00 (((s0 >> 0) & 1) ? -1 : 0) + #define S01 (((s0 >> 1) & 1) ? -1 : 0) + #define S02 (((s0 >> 2) & 1) ? -1 : 0) + #define S03 (((s0 >> 3) & 1) ? -1 : 0) + #define S04 (((s0 >> 4) & 1) ? -1 : 0) + #define S05 (((s0 >> 5) & 1) ? -1 : 0) + #define S06 (((s0 >> 6) & 1) ? -1 : 0) + #define S07 (((s0 >> 7) & 1) ? -1 : 0) + #define S08 (((s0 >> 8) & 1) ? -1 : 0) + #define S09 (((s0 >> 9) & 1) ? -1 : 0) + #define S10 (((s0 >> 10) & 1) ? -1 : 0) + #define S11 (((s0 >> 11) & 1) ? -1 : 0) + #define S12 (((s0 >> 12) & 1) ? -1 : 0) + #define S13 (((s0 >> 13) & 1) ? -1 : 0) + #define S14 (((s0 >> 14) & 1) ? -1 : 0) + #define S15 (((s0 >> 15) & 1) ? -1 : 0) + #define S16 (((s0 >> 16) & 1) ? -1 : 0) + #define S17 (((s0 >> 17) & 1) ? -1 : 0) + #define S18 (((s0 >> 18) & 1) ? -1 : 0) + #define S19 (((s0 >> 19) & 1) ? -1 : 0) + #define S20 (((s0 >> 20) & 1) ? -1 : 0) + #define S21 (((s0 >> 21) & 1) ? -1 : 0) + #define S22 (((s0 >> 22) & 1) ? -1 : 0) + #define S23 (((s0 >> 23) & 1) ? -1 : 0) + #define S24 (((s0 >> 24) & 1) ? -1 : 0) + #define S25 (((s0 >> 25) & 1) ? -1 : 0) + #define S26 (((s0 >> 26) & 1) ? -1 : 0) + #define S27 (((s0 >> 27) & 1) ? -1 : 0) + #define S28 (((s0 >> 28) & 1) ? -1 : 0) + #define S29 (((s0 >> 29) & 1) ? -1 : 0) + #define S30 (((s0 >> 30) & 1) ? -1 : 0) + #define S31 (((s0 >> 31) & 1) ? -1 : 0) + #define S32 (((s1 >> 0) & 1) ? -1 : 0) + #define S33 (((s1 >> 1) & 1) ? -1 : 0) + #define S34 (((s1 >> 2) & 1) ? -1 : 0) + #define S35 (((s1 >> 3) & 1) ? -1 : 0) + #define S36 (((s1 >> 4) & 1) ? -1 : 0) + #define S37 (((s1 >> 5) & 1) ? -1 : 0) + #define S38 (((s1 >> 6) & 1) ? -1 : 0) + #define S39 (((s1 >> 7) & 1) ? -1 : 0) + #define S40 (((s1 >> 8) & 1) ? -1 : 0) + #define S41 (((s1 >> 9) & 1) ? -1 : 0) + #define S42 (((s1 >> 10) & 1) ? -1 : 0) + #define S43 (((s1 >> 11) & 1) ? -1 : 0) + #define S44 (((s1 >> 12) & 1) ? -1 : 0) + #define S45 (((s1 >> 13) & 1) ? -1 : 0) + #define S46 (((s1 >> 14) & 1) ? -1 : 0) + #define S47 (((s1 >> 15) & 1) ? -1 : 0) + #define S48 (((s1 >> 16) & 1) ? -1 : 0) + #define S49 (((s1 >> 17) & 1) ? -1 : 0) + #define S50 (((s1 >> 18) & 1) ? -1 : 0) + #define S51 (((s1 >> 19) & 1) ? -1 : 0) + #define S52 (((s1 >> 20) & 1) ? -1 : 0) + #define S53 (((s1 >> 21) & 1) ? -1 : 0) + #define S54 (((s1 >> 22) & 1) ? -1 : 0) + #define S55 (((s1 >> 23) & 1) ? -1 : 0) + #define S56 (((s1 >> 24) & 1) ? -1 : 0) + #define S57 (((s1 >> 25) & 1) ? -1 : 0) + #define S58 (((s1 >> 26) & 1) ? -1 : 0) + #define S59 (((s1 >> 27) & 1) ? -1 : 0) + #define S60 (((s1 >> 28) & 1) ? -1 : 0) + #define S61 (((s1 >> 29) & 1) ? -1 : 0) + #define S62 (((s1 >> 30) & 1) ? -1 : 0) + #define S63 (((s1 >> 31) & 1) ? -1 : 0) + + /** + * base + */ + + const u32 w0 = pws[gid].i[0]; + const u32 w1 = pws[gid].i[1]; + + #define K00 (((w0 >> ( 0 + 7)) & 1) ? -1 : 0) + #define K01 (((w0 >> ( 0 + 6)) & 1) ? -1 : 0) + #define K02 (((w0 >> ( 0 + 5)) & 1) ? -1 : 0) + #define K03 (((w0 >> ( 0 + 4)) & 1) ? -1 : 0) + #define K04 (((w0 >> ( 0 + 3)) & 1) ? -1 : 0) + #define K05 (((w0 >> ( 0 + 2)) & 1) ? -1 : 0) + #define K06 (((w0 >> ( 0 + 1)) & 1) ? -1 : 0) + #define K07 (((w0 >> ( 8 + 7)) & 1) ? -1 : 0) + #define K08 (((w0 >> ( 8 + 6)) & 1) ? -1 : 0) + #define K09 (((w0 >> ( 8 + 5)) & 1) ? -1 : 0) + #define K10 (((w0 >> ( 8 + 4)) & 1) ? -1 : 0) + #define K11 (((w0 >> ( 8 + 3)) & 1) ? -1 : 0) + #define K12 (((w0 >> ( 8 + 2)) & 1) ? -1 : 0) + #define K13 (((w0 >> ( 8 + 1)) & 1) ? -1 : 0) + #define K14 (((w0 >> (16 + 7)) & 1) ? -1 : 0) + #define K15 (((w0 >> (16 + 6)) & 1) ? -1 : 0) + #define K16 (((w0 >> (16 + 5)) & 1) ? -1 : 0) + #define K17 (((w0 >> (16 + 4)) & 1) ? -1 : 0) + #define K18 (((w0 >> (16 + 3)) & 1) ? -1 : 0) + #define K19 (((w0 >> (16 + 2)) & 1) ? -1 : 0) + #define K20 (((w0 >> (16 + 1)) & 1) ? -1 : 0) + #define K21 (((w0 >> (24 + 7)) & 1) ? -1 : 0) + #define K22 (((w0 >> (24 + 6)) & 1) ? -1 : 0) + #define K23 (((w0 >> (24 + 5)) & 1) ? -1 : 0) + #define K24 (((w0 >> (24 + 4)) & 1) ? -1 : 0) + #define K25 (((w0 >> (24 + 3)) & 1) ? -1 : 0) + #define K26 (((w0 >> (24 + 2)) & 1) ? -1 : 0) + #define K27 (((w0 >> (24 + 1)) & 1) ? -1 : 0) + #define K28 (((w1 >> ( 0 + 7)) & 1) ? -1 : 0) + #define K29 (((w1 >> ( 0 + 6)) & 1) ? -1 : 0) + #define K30 (((w1 >> ( 0 + 5)) & 1) ? -1 : 0) + #define K31 (((w1 >> ( 0 + 4)) & 1) ? -1 : 0) + #define K32 (((w1 >> ( 0 + 3)) & 1) ? -1 : 0) + #define K33 (((w1 >> ( 0 + 2)) & 1) ? -1 : 0) + #define K34 (((w1 >> ( 0 + 1)) & 1) ? -1 : 0) + #define K35 (((w1 >> ( 8 + 7)) & 1) ? -1 : 0) + #define K36 (((w1 >> ( 8 + 6)) & 1) ? -1 : 0) + #define K37 (((w1 >> ( 8 + 5)) & 1) ? -1 : 0) + #define K38 (((w1 >> ( 8 + 4)) & 1) ? -1 : 0) + #define K39 (((w1 >> ( 8 + 3)) & 1) ? -1 : 0) + #define K40 (((w1 >> ( 8 + 2)) & 1) ? -1 : 0) + #define K41 (((w1 >> ( 8 + 1)) & 1) ? -1 : 0) + #define K42 (((w1 >> (16 + 7)) & 1) ? -1 : 0) + #define K43 (((w1 >> (16 + 6)) & 1) ? -1 : 0) + #define K44 (((w1 >> (16 + 5)) & 1) ? -1 : 0) + #define K45 (((w1 >> (16 + 4)) & 1) ? -1 : 0) + #define K46 (((w1 >> (16 + 3)) & 1) ? -1 : 0) + #define K47 (((w1 >> (16 + 2)) & 1) ? -1 : 0) + #define K48 (((w1 >> (16 + 1)) & 1) ? -1 : 0) + #define K49 (((w1 >> (24 + 7)) & 1) ? -1 : 0) + #define K50 (((w1 >> (24 + 6)) & 1) ? -1 : 0) + #define K51 (((w1 >> (24 + 5)) & 1) ? -1 : 0) + #define K52 (((w1 >> (24 + 4)) & 1) ? -1 : 0) + #define K53 (((w1 >> (24 + 3)) & 1) ? -1 : 0) + #define K54 (((w1 >> (24 + 2)) & 1) ? -1 : 0) + #define K55 (((w1 >> (24 + 1)) & 1) ? -1 : 0) + + /** + * inner loop + */ + + const u32 pc_pos = get_local_id (1); + + const u32 il_pos = pc_pos * 32; + + u32 k00 = K00; + u32 k01 = K01; + u32 k02 = K02; + u32 k03 = K03; + u32 k04 = K04; + u32 k05 = K05; + u32 k06 = K06; + u32 k07 = K07; + u32 k08 = K08; + u32 k09 = K09; + u32 k10 = K10; + u32 k11 = K11; + u32 k12 = K12; + u32 k13 = K13; + u32 k14 = K14; + u32 k15 = K15; + u32 k16 = K16; + u32 k17 = K17; + u32 k18 = K18; + u32 k19 = K19; + u32 k20 = K20; + u32 k21 = K21; + u32 k22 = K22; + u32 k23 = K23; + u32 k24 = K24; + u32 k25 = K25; + u32 k26 = K26; + u32 k27 = K27; + + k00 |= words_buf_r[pc_pos].b[ 0]; + k01 |= words_buf_r[pc_pos].b[ 1]; + k02 |= words_buf_r[pc_pos].b[ 2]; + k03 |= words_buf_r[pc_pos].b[ 3]; + k04 |= words_buf_r[pc_pos].b[ 4]; + k05 |= words_buf_r[pc_pos].b[ 5]; + k06 |= words_buf_r[pc_pos].b[ 6]; + k07 |= words_buf_r[pc_pos].b[ 7]; + k08 |= words_buf_r[pc_pos].b[ 8]; + k09 |= words_buf_r[pc_pos].b[ 9]; + k10 |= words_buf_r[pc_pos].b[10]; + k11 |= words_buf_r[pc_pos].b[11]; + k12 |= words_buf_r[pc_pos].b[12]; + k13 |= words_buf_r[pc_pos].b[13]; + k14 |= words_buf_r[pc_pos].b[14]; + k15 |= words_buf_r[pc_pos].b[15]; + k16 |= words_buf_r[pc_pos].b[16]; + k17 |= words_buf_r[pc_pos].b[17]; + k18 |= words_buf_r[pc_pos].b[18]; + k19 |= words_buf_r[pc_pos].b[19]; + k20 |= words_buf_r[pc_pos].b[20]; + k21 |= words_buf_r[pc_pos].b[21]; + k22 |= words_buf_r[pc_pos].b[22]; + k23 |= words_buf_r[pc_pos].b[23]; + k24 |= words_buf_r[pc_pos].b[24]; + k25 |= words_buf_r[pc_pos].b[25]; + k26 |= words_buf_r[pc_pos].b[26]; + k27 |= words_buf_r[pc_pos].b[27]; + + DES + ( + k00, k01, k02, k03, k04, k05, k06, + k07, k08, k09, k10, k11, k12, k13, + k14, k15, k16, k17, k18, k19, k20, + k21, k22, k23, k24, k25, k26, k27, + K28, K29, K30, K31, K32, K33, K34, + K35, K36, K37, K38, K39, K40, K41, + K42, K43, K44, K45, K46, K47, K48, + K49, K50, K51, K52, K53, K54, K55, + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 + ); + + u32 tmpResult = 0; + + tmpResult |= D00 ^ S00; + tmpResult |= D01 ^ S01; + tmpResult |= D02 ^ S02; + tmpResult |= D03 ^ S03; + tmpResult |= D04 ^ S04; + tmpResult |= D05 ^ S05; + tmpResult |= D06 ^ S06; + tmpResult |= D07 ^ S07; + tmpResult |= D08 ^ S08; + tmpResult |= D09 ^ S09; + tmpResult |= D10 ^ S10; + tmpResult |= D11 ^ S11; + tmpResult |= D12 ^ S12; + tmpResult |= D13 ^ S13; + tmpResult |= D14 ^ S14; + tmpResult |= D15 ^ S15; + + if (tmpResult == 0xffffffff) return; + + tmpResult |= D16 ^ S16; + tmpResult |= D17 ^ S17; + tmpResult |= D18 ^ S18; + tmpResult |= D19 ^ S19; + tmpResult |= D20 ^ S20; + tmpResult |= D21 ^ S21; + tmpResult |= D22 ^ S22; + tmpResult |= D23 ^ S23; + tmpResult |= D24 ^ S24; + tmpResult |= D25 ^ S25; + tmpResult |= D26 ^ S26; + tmpResult |= D27 ^ S27; + tmpResult |= D28 ^ S28; + tmpResult |= D29 ^ S29; + tmpResult |= D30 ^ S30; + tmpResult |= D31 ^ S31; + + if (tmpResult == 0xffffffff) return; + + tmpResult |= D32 ^ S32; + tmpResult |= D33 ^ S33; + tmpResult |= D34 ^ S34; + tmpResult |= D35 ^ S35; + tmpResult |= D36 ^ S36; + tmpResult |= D37 ^ S37; + tmpResult |= D38 ^ S38; + tmpResult |= D39 ^ S39; + tmpResult |= D40 ^ S40; + tmpResult |= D41 ^ S41; + tmpResult |= D42 ^ S42; + tmpResult |= D43 ^ S43; + tmpResult |= D44 ^ S44; + tmpResult |= D45 ^ S45; + tmpResult |= D46 ^ S46; + tmpResult |= D47 ^ S47; + + if (tmpResult == 0xffffffff) return; + + tmpResult |= D48 ^ S48; + tmpResult |= D49 ^ S49; + tmpResult |= D50 ^ S50; + tmpResult |= D51 ^ S51; + tmpResult |= D52 ^ S52; + tmpResult |= D53 ^ S53; + tmpResult |= D54 ^ S54; + tmpResult |= D55 ^ S55; + tmpResult |= D56 ^ S56; + tmpResult |= D57 ^ S57; + tmpResult |= D58 ^ S58; + tmpResult |= D59 ^ S59; + tmpResult |= D60 ^ S60; + tmpResult |= D61 ^ S61; + tmpResult |= D62 ^ S62; + tmpResult |= D63 ^ S63; + + if (tmpResult == 0xffffffff) return; + + const u32 slice = 31 - clz (~tmpResult); + + #include COMPARE_S +} + +// +// transpose bitslice mod : attention race conditions, need different buffers for *in and *out +// + +__kernel void m14000_tm (__global u32 *mod, __global bs_word_t *words_buf_r) +{ + const u32 gid = get_global_id (0); + + const u32 block = gid / 32; + const u32 slice = gid % 32; + + const u32 w0 = mod[gid]; + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0, j = 0; i < 32; i += 8, j += 7) + { + atomic_or (&words_buf_r[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice)); + atomic_or (&words_buf_r[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice)); + atomic_or (&words_buf_r[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice)); + atomic_or (&words_buf_r[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice)); + atomic_or (&words_buf_r[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice)); + atomic_or (&words_buf_r[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice)); + atomic_or (&words_buf_r[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice)); + } +} + +__kernel void m14000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * main + */ + + m14000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * main + */ + + m14000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} diff --git a/OpenCL/m14100_a0.cl b/OpenCL/m14100_a0.cl new file mode 100644 index 000000000..bff2ea0c2 --- /dev/null +++ b/OpenCL/m14100_a0.cl @@ -0,0 +1,845 @@ +/** + * Authors.....: Jens Steube + * Gabriele Gristina + * magnum + * Frans Lategan + * + * License.....: MIT + */ + +#define _DES_ + +#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_simd.cl" + +#define PERM_OP(a,b,tt,n,m) \ +{ \ + tt = a >> n; \ + tt = tt ^ b; \ + tt = tt & m; \ + b = b ^ tt; \ + tt = tt << n; \ + a = a ^ tt; \ +} + +#define HPERM_OP(a,tt,n,m) \ +{ \ + tt = a << (16 + n); \ + tt = tt ^ a; \ + tt = tt & m; \ + a = a ^ tt; \ + tt = tt >> (16 + n); \ + a = a ^ tt; \ +} + +#define IP(l,r,tt) \ +{ \ + PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ + PERM_OP (l, r, tt, 16, 0x0000ffff); \ + PERM_OP (r, l, tt, 2, 0x33333333); \ + PERM_OP (l, r, tt, 8, 0x00ff00ff); \ + PERM_OP (r, l, tt, 1, 0x55555555); \ +} + +#define FP(l,r,tt) \ +{ \ + PERM_OP (l, r, tt, 1, 0x55555555); \ + PERM_OP (r, l, tt, 8, 0x00ff00ff); \ + PERM_OP (l, r, tt, 2, 0x33333333); \ + PERM_OP (r, l, tt, 16, 0x0000ffff); \ + PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ +} + +__constant u32 c_SPtrans[8][64] = +{ + { + /* nibble 0 */ + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + /* nibble 1 */ + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + /* nibble 2 */ + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + /* nibble 3 */ + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + /* nibble 4 */ + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + /* nibble 5 */ + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + /* nibble 6 */ + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + /* nibble 7 */ + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + }, +}; + +__constant u32 c_skb[8][64] = +{ + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + 0x00090820, 0x00090830, 0x20090820, 0x20090830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + } +}; + +#if VECT_SIZE == 1 +#define BOX(i,n,S) (S)[(n)][(i)] +#elif VECT_SIZE == 2 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#elif VECT_SIZE == 4 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#elif VECT_SIZE == 8 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) +#elif VECT_SIZE == 16 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) +#endif + +#if VECT_SIZE == 1 +#define BOX1(i,S) (S)[(i)] +#elif VECT_SIZE == 2 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1]) +#elif VECT_SIZE == 4 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#elif VECT_SIZE == 8 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) +#endif + +void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i += 2) + { + u32x u; + u32x t; + + u = Kc[i + 0] ^ r; + t = Kd[i + 0] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i + 1] ^ l; + t = Kd[i + 1] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_decrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 16; i > 0; i -= 2) + { + u32x u; + u32x t; + + u = Kc[i - 1] ^ r; + t = Kd[i - 1] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i - 2] ^ l; + t = Kd[i - 2] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64]) +{ + u32x tt; + + PERM_OP (d, c, tt, 4, 0x0f0f0f0f); + HPERM_OP (c, tt, 2, 0xcccc0000); + HPERM_OP (d, tt, 2, 0xcccc0000); + PERM_OP (d, c, tt, 1, 0x55555555); + PERM_OP (c, d, tt, 8, 0x00ff00ff); + PERM_OP (d, c, tt, 1, 0x55555555); + + d = ((d & 0x000000ff) << 16) + | ((d & 0x0000ff00) << 0) + | ((d & 0x00ff0000) >> 16) + | ((c & 0xf0000000) >> 4); + + c = c & 0x0fffffff; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i++) + { + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } + + c = c & 0x0fffffff; + d = d & 0x0fffffff; + + const u32x c00 = (c >> 0) & 0x0000003f; + const u32x c06 = (c >> 6) & 0x00383003; + const u32x c07 = (c >> 7) & 0x0000003c; + const u32x c13 = (c >> 13) & 0x0000060f; + const u32x c20 = (c >> 20) & 0x00000001; + + u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); + + const u32x d00 = (d >> 0) & 0x00003c3f; + const u32x d07 = (d >> 7) & 0x00003f03; + const u32x d21 = (d >> 21) & 0x0000000f; + const u32x d22 = (d >> 22) & 0x00000030; + + u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); + + Kc[i] = ((t << 16) | (s & 0x0000ffff)); + Kd[i] = ((s >> 16) | (t & 0xffff0000)); + + Kc[i] = rotl32 (Kc[i], 2u); + Kd[i] = rotl32 (Kd[i], 2u); + } +} + +__kernel void m14100_m04 (__global pw_t *pws, __global kernel_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = pws[gid].i[ 2]; + pw_buf0[3] = pws[gid].i[ 3]; + pw_buf1[0] = pws[gid].i[ 4]; + pw_buf1[1] = pws[gid].i[ 5]; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * main + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /* First Pass */ + + const u32x a = w0[0]; + const u32x b = w0[1]; + + u32x Ka[16]; + u32x Kb[16]; + + _des_crypt_keysetup (a, b, Ka, Kb, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x p1[2]; + + _des_crypt_encrypt (p1, data, Ka, Kb, s_SPtrans); + + /* Second Pass */ + + const u32x c = w0[2]; + const u32x d = w0[3]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x p2[2]; + + _des_crypt_decrypt (p2, p1, Kc, Kd, s_SPtrans); + + /* Third Pass */ + + const u32x e = w1[0]; + const u32x f = w1[1]; + + u32x Ke[16]; + u32x Kf[16]; + + _des_crypt_keysetup (e, f, Ke, Kf, s_skb); + + u32x iv[2]; + + _des_crypt_encrypt (iv, p2, Ke, Kf, s_SPtrans); + + u32x z = 0; + + COMPARE_M_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14100_s04 (__global pw_t *pws, __global kernel_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = pws[gid].i[ 2]; + pw_buf0[3] = pws[gid].i[ 3]; + pw_buf1[0] = pws[gid].i[ 4]; + pw_buf1[1] = pws[gid].i[ 5]; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * main + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /* First Pass */ + + const u32x a = w0[0]; + const u32x b = w0[1]; + + u32x Ka[16]; + u32x Kb[16]; + + _des_crypt_keysetup (a, b, Ka, Kb, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x p1[2]; + + _des_crypt_encrypt (p1, data, Ka, Kb, s_SPtrans); + + /* Second Pass */ + + const u32x c = w0[2]; + const u32x d = w0[3]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x p2[2]; + + _des_crypt_decrypt (p2, p1, Kc, Kd, s_SPtrans); + + /* Third Pass */ + + const u32x e = w1[0]; + const u32x f = w1[1]; + + u32x Ke[16]; + u32x Kf[16]; + + _des_crypt_keysetup (e, f, Ke, Kf, s_skb); + + u32x iv[2]; + + _des_crypt_encrypt (iv, p2, Ke, Kf, s_SPtrans); + + u32x z = 0; + + COMPARE_S_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} diff --git a/OpenCL/m14100_a1.cl b/OpenCL/m14100_a1.cl new file mode 100644 index 000000000..bc314c805 --- /dev/null +++ b/OpenCL/m14100_a1.cl @@ -0,0 +1,932 @@ +/** + * Authors.....: Jens Steube + * Gabriele Gristina + * Frans Lategan + * + * License.....: MIT + */ + +#define _DES_ + +#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" + +#define PERM_OP(a,b,tt,n,m) \ +{ \ + tt = a >> n; \ + tt = tt ^ b; \ + tt = tt & m; \ + b = b ^ tt; \ + tt = tt << n; \ + a = a ^ tt; \ +} + +#define HPERM_OP(a,tt,n,m) \ +{ \ + tt = a << (16 + n); \ + tt = tt ^ a; \ + tt = tt & m; \ + a = a ^ tt; \ + tt = tt >> (16 + n); \ + a = a ^ tt; \ +} + +#define IP(l,r,tt) \ +{ \ + PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ + PERM_OP (l, r, tt, 16, 0x0000ffff); \ + PERM_OP (r, l, tt, 2, 0x33333333); \ + PERM_OP (l, r, tt, 8, 0x00ff00ff); \ + PERM_OP (r, l, tt, 1, 0x55555555); \ +} + +#define FP(l,r,tt) \ +{ \ + PERM_OP (l, r, tt, 1, 0x55555555); \ + PERM_OP (r, l, tt, 8, 0x00ff00ff); \ + PERM_OP (l, r, tt, 2, 0x33333333); \ + PERM_OP (r, l, tt, 16, 0x0000ffff); \ + PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ +} + +__constant u32 c_SPtrans[8][64] = +{ + { + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + } +}; + +__constant u32 c_skb[8][64] = +{ + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + 0x00090820, 0x00090830, 0x20090820, 0x20090830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + } +}; + +#if VECT_SIZE == 1 +#define BOX(i,n,S) (S)[(n)][(i)] +#elif VECT_SIZE == 2 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#elif VECT_SIZE == 4 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#elif VECT_SIZE == 8 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) +#elif VECT_SIZE == 16 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) +#endif + +#if VECT_SIZE == 1 +#define BOX1(i,S) (S)[(i)] +#elif VECT_SIZE == 2 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1]) +#elif VECT_SIZE == 4 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#elif VECT_SIZE == 8 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) +#endif + +void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i += 2) + { + u32x u; + u32x t; + + u = Kc[i + 0] ^ r; + t = Kd[i + 0] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i + 1] ^ l; + t = Kd[i + 1] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_decrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 16; i > 0; i -= 2) + { + u32x u; + u32x t; + + u = Kc[i - 1] ^ r; + t = Kd[i - 1] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i - 2] ^ l; + t = Kd[i - 2] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64]) +{ + u32x tt; + + PERM_OP (d, c, tt, 4, 0x0f0f0f0f); + HPERM_OP (c, tt, 2, 0xcccc0000); + HPERM_OP (d, tt, 2, 0xcccc0000); + PERM_OP (d, c, tt, 1, 0x55555555); + PERM_OP (c, d, tt, 8, 0x00ff00ff); + PERM_OP (d, c, tt, 1, 0x55555555); + + d = ((d & 0x000000ff) << 16) + | ((d & 0x0000ff00) << 0) + | ((d & 0x00ff0000) >> 16) + | ((c & 0xf0000000) >> 4); + + c = c & 0x0fffffff; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i++) + { + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } + + c = c & 0x0fffffff; + d = d & 0x0fffffff; + + const u32x c00 = (c >> 0) & 0x0000003f; + const u32x c06 = (c >> 6) & 0x00383003; + const u32x c07 = (c >> 7) & 0x0000003c; + const u32x c13 = (c >> 13) & 0x0000060f; + const u32x c20 = (c >> 20) & 0x00000001; + + u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); + + const u32x d00 = (d >> 0) & 0x00003c3f; + const u32x d07 = (d >> 7) & 0x00003f03; + const u32x d21 = (d >> 21) & 0x0000000f; + const u32x d22 = (d >> 22) & 0x00000030; + + u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); + + Kc[i] = ((t << 16) | (s & 0x0000ffff)); + Kd[i] = ((s >> 16) | (t & 0xffff0000)); + + Kc[i] = rotl32 (Kc[i], 2u); + Kd[i] = rotl32 (Kd[i], 2u); + } +} + +__kernel void m14100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = pws[gid].i[ 2]; + pw_buf0[3] = pws[gid].i[ 3]; + pw_buf1[0] = pws[gid].i[ 4]; + pw_buf1[1] = pws[gid].i[ 5]; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + + /* First Pass */ + + const u32x a = w0[0]; + const u32x b = w0[1]; + + u32x Ka[16]; + u32x Kb[16]; + + _des_crypt_keysetup (a, b, Ka, Kb, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x p1[2]; + + _des_crypt_encrypt (p1, data, Ka, Kb, s_SPtrans); + + /* Second Pass */ + + const u32x c = w0[2]; + const u32x d = w0[3]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x p2[2]; + + _des_crypt_decrypt (p2, p1, Kc, Kd, s_SPtrans); + + /* Third Pass */ + + const u32x e = w1[0]; + const u32x f = w1[1]; + + u32x Ke[16]; + u32x Kf[16]; + + _des_crypt_keysetup (e, f, Ke, Kf, s_skb); + + u32x iv[2]; + + _des_crypt_encrypt (iv, p2, Ke, Kf, s_SPtrans); + + u32x z = 0; + + COMPARE_M_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[ 0]; + pw_buf0[1] = pws[gid].i[ 1]; + pw_buf0[2] = pws[gid].i[ 2]; + pw_buf0[3] = pws[gid].i[ 3]; + pw_buf1[0] = pws[gid].i[ 4]; + pw_buf1[1] = pws[gid].i[ 5]; + pw_buf1[2] = 0; + pw_buf1[3] = 0; + + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + + /* First Pass */ + + const u32x a = w0[0]; + const u32x b = w0[1]; + + u32x Ka[16]; + u32x Kb[16]; + + _des_crypt_keysetup (a, b, Ka, Kb, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x p1[2]; + + _des_crypt_encrypt (p1, data, Ka, Kb, s_SPtrans); + + /* Second Pass */ + + const u32x c = w0[2]; + const u32x d = w0[3]; + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x p2[2]; + + _des_crypt_decrypt (p2, p1, Kc, Kd, s_SPtrans); + + /* Third Pass */ + + const u32x e = w1[0]; + const u32x f = w1[1]; + + u32x Ke[16]; + u32x Kf[16]; + + _des_crypt_keysetup (e, f, Ke, Kf, s_skb); + + u32x iv[2]; + + _des_crypt_encrypt (iv, p2, Ke, Kf, s_SPtrans); + + u32x z = 0; + + COMPARE_S_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} + +__kernel void m14100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ +} diff --git a/OpenCL/m14100_a3.cl b/OpenCL/m14100_a3.cl new file mode 100644 index 000000000..522afd51c --- /dev/null +++ b/OpenCL/m14100_a3.cl @@ -0,0 +1,1163 @@ +/** + * Authors.....: Jens Steube + * Gabriele Gristina + * magnum + * Frans Lategan + * + * License.....: MIT + */ + +#define _DES_ + +#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" + +#define PERM_OP(a,b,tt,n,m) \ +{ \ + tt = a >> n; \ + tt = tt ^ b; \ + tt = tt & m; \ + b = b ^ tt; \ + tt = tt << n; \ + a = a ^ tt; \ +} + +#define HPERM_OP(a,tt,n,m) \ +{ \ + tt = a << (16 + n); \ + tt = tt ^ a; \ + tt = tt & m; \ + a = a ^ tt; \ + tt = tt >> (16 + n); \ + a = a ^ tt; \ +} + +#define IP(l,r,tt) \ +{ \ + PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ + PERM_OP (l, r, tt, 16, 0x0000ffff); \ + PERM_OP (r, l, tt, 2, 0x33333333); \ + PERM_OP (l, r, tt, 8, 0x00ff00ff); \ + PERM_OP (r, l, tt, 1, 0x55555555); \ +} + +#define FP(l,r,tt) \ +{ \ + PERM_OP (l, r, tt, 1, 0x55555555); \ + PERM_OP (r, l, tt, 8, 0x00ff00ff); \ + PERM_OP (l, r, tt, 2, 0x33333333); \ + PERM_OP (r, l, tt, 16, 0x0000ffff); \ + PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ +} + +__constant u32 c_SPtrans[8][64] = +{ + { + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + } +}; + +__constant u32 c_skb[8][64] = +{ + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + 0x00090820, 0x00090830, 0x20090820, 0x20090830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + } +}; + +#if VECT_SIZE == 1 +#define BOX(i,n,S) (S)[(n)][(i)] +#elif VECT_SIZE == 2 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#elif VECT_SIZE == 4 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#elif VECT_SIZE == 8 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) +#elif VECT_SIZE == 16 +#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) +#endif + +#if VECT_SIZE == 1 +#define BOX1(i,S) (S)[(i)] +#elif VECT_SIZE == 2 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1]) +#elif VECT_SIZE == 4 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#elif VECT_SIZE == 8 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) +#endif + +void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i += 2) + { + u32x u; + u32x t; + + u = Kc[i + 0] ^ r; + t = Kd[i + 0] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i + 1] ^ l; + t = Kd[i + 1] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_decrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64]) +{ + u32x r = rotl32 (data[0], 3u); + u32x l = rotl32 (data[1], 3u); + + u32x tt; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 16; i > 0; i -= 2) + { + u32x u; + u32x t; + + u = Kc[i - 1] ^ r; + t = Kd[i - 1] ^ rotl32 (r, 28u); + + l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + + u = Kc[i - 2] ^ l; + t = Kd[i - 2] ^ rotl32 (l, 28u); + + r ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); + } + + iv[0] = rotl32 (l, 29u); + iv[1] = rotl32 (r, 29u); +} + +void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64]) +{ + u32x tt; + + PERM_OP (d, c, tt, 4, 0x0f0f0f0f); + HPERM_OP (c, tt, 2, 0xcccc0000); + HPERM_OP (d, tt, 2, 0xcccc0000); + PERM_OP (d, c, tt, 1, 0x55555555); + PERM_OP (c, d, tt, 8, 0x00ff00ff); + PERM_OP (d, c, tt, 1, 0x55555555); + + d = ((d & 0x000000ff) << 16) + | ((d & 0x0000ff00) << 0) + | ((d & 0x00ff0000) >> 16) + | ((c & 0xf0000000) >> 4); + + c = c & 0x0fffffff; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i++) + { + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } + + c = c & 0x0fffffff; + d = d & 0x0fffffff; + + const u32x c00 = (c >> 0) & 0x0000003f; + const u32x c06 = (c >> 6) & 0x00383003; + const u32x c07 = (c >> 7) & 0x0000003c; + const u32x c13 = (c >> 13) & 0x0000060f; + const u32x c20 = (c >> 20) & 0x00000001; + + u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); + + const u32x d00 = (d >> 0) & 0x00003c3f; + const u32x d07 = (d >> 7) & 0x00003f03; + const u32x d21 = (d >> 21) & 0x0000000f; + const u32x d22 = (d >> 22) & 0x00000030; + + u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); + + Kc[i] = ((t << 16) | (s & 0x0000ffff)); + Kd[i] = ((s >> 16) | (t & 0xffff0000)); + + Kc[i] = rotl32 (Kc[i], 2u); + Kd[i] = rotl32 (Kd[i], 2u); + } +} + +void m14100m (__local u32 (*s_SPtrans)[64], __local u32 (*s_skb)[64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * loop + */ + + u32 w0l = w[0]; + + u32 w1 = w[1]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + /* First Pass */ + + const u32x a = (w0); + const u32x b = (w1); + + u32x Ka[16]; + u32x Kb[16]; + + _des_crypt_keysetup (a, b, Ka, Kb, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x p1[2]; + + _des_crypt_encrypt (p1, data, Ka, Kb, s_SPtrans); + + /* Second Pass */ + + const u32x c = (w[2]); + const u32x d = (w[3]); + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x p2[2]; + + _des_crypt_decrypt (p2, p1, Kc, Kd, s_SPtrans); + + /* Third Pass */ + + const u32x e = (w[4]); + const u32x f = (w[5]); + + u32x Ke[16]; + u32x Kf[16]; + + _des_crypt_keysetup (e, f, Ke, Kf, s_skb); + + u32x iv[2]; + + _des_crypt_encrypt (iv, p2, Ke, Kf, s_SPtrans); + + u32x z = 0; + + COMPARE_M_SIMD (iv[0], iv[1], z, z); + } +} + +void m14100s (__local u32 (*s_SPtrans)[64], __local u32 (*s_skb)[64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[2]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + u32 w1 = w[1]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + /* First Pass */ + + const u32x a = (w0); + const u32x b = (w1); + + u32x Ka[16]; + u32x Kb[16]; + + _des_crypt_keysetup (a, b, Ka, Kb, s_skb); + + u32x data[2]; + + data[0] = salt_buf0[0]; + data[1] = salt_buf0[1]; + + u32x p1[2]; + + _des_crypt_encrypt (p1, data, Ka, Kb, s_SPtrans); + + /* Second Pass */ + + const u32x c = (w[2]); + const u32x d = (w[3]); + + u32x Kc[16]; + u32x Kd[16]; + + _des_crypt_keysetup (c, d, Kc, Kd, s_skb); + + u32x p2[2]; + + _des_crypt_decrypt (p2, p1, Kc, Kd, s_SPtrans); + + /* Third Pass */ + + const u32x e = (w[4]); + const u32x f = (w[5]); + + u32x Ke[16]; + u32x Kf[16]; + + _des_crypt_keysetup (e, f, Ke, Kf, s_skb); + + u32x iv[2]; + + _des_crypt_encrypt (iv, p2, Ke, Kf, s_SPtrans); + + u32x z = 0; + + COMPARE_S_SIMD (iv[0], iv[1], z, z); + } +} + +__kernel void m14100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m14100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * base + */ + + const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + + /** + * shared + */ + + __local u32 s_SPtrans[8][64]; + __local u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} diff --git a/docs/changes.txt b/docs/changes.txt index b24af549c..6d4b97ebb 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,3 +1,12 @@ +* changes v3.00 -> v3.xx: + +## +## Algorithms +## + +- Added new hash-mode 14000 = DES (PT = $salt, key = $pass) +- Added new hash-mode 14100 = 3DES (PT = $salt, key = $pass) + * changes v3.00 -> v3.10: ## diff --git a/docs/readme.txt b/docs/readme.txt index e81ddab67..f736df126 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -45,6 +45,8 @@ NVidia users require NVidia drivers 346.59 or later (recommended 367.27 or later - SipHash - RipeMD160 - Whirlpool +- DES (PT = $salt, key = $pass) +- 3DES (PT = $salt, key = $pass) - GOST R 34.11-94 - GOST R 34.11-2012 (Streebog) 256-bit - GOST R 34.11-2012 (Streebog) 512-bit diff --git a/include/shared.h b/include/shared.h index 0808d2a05..303eb4726 100644 --- a/include/shared.h +++ b/include/shared.h @@ -370,6 +370,8 @@ extern hc_thread_mutex_t mux_display; #define HT_13600 "WinZip" #define HT_13800 "Windows 8+ phone PIN/Password" #define HT_13900 "OpenCart" +#define HT_14000 "DES (PT = $salt, key = $pass)" +#define HT_14100 "3DES (PT = $salt, key = $pass)" #define HT_00011 "Joomla < 2.5.18" #define HT_00012 "PostgreSQL" @@ -736,6 +738,10 @@ extern hc_thread_mutex_t mux_display; #define DISPLAY_LEN_MAX_13800 64 + 1 + 256 #define DISPLAY_LEN_MIN_13900 40 + 1 + 9 #define DISPLAY_LEN_MAX_13900 40 + 1 + 9 +#define DISPLAY_LEN_MIN_14000 16 + 1 + 16 +#define DISPLAY_LEN_MAX_14000 16 + 1 + 16 +#define DISPLAY_LEN_MIN_14100 16 + 1 + 16 +#define DISPLAY_LEN_MAX_14100 16 + 1 + 16 #define DISPLAY_LEN_MIN_11 32 + 1 + 16 #define DISPLAY_LEN_MAX_11 32 + 1 + 32 @@ -853,6 +859,7 @@ extern hc_thread_mutex_t mux_display; #define HASH_TYPE_RAR3HP 49 #define HASH_TYPE_KRB5TGS 50 #define HASH_TYPE_STDOUT 51 +#define HASH_TYPE_DES 52 #define KERN_TYPE_MD5 0 #define KERN_TYPE_MD5_PWSLT 10 @@ -1009,6 +1016,8 @@ extern hc_thread_mutex_t mux_display; #define KERN_TYPE_ZIP2 13600 #define KERN_TYPE_WIN8PHONE 13800 #define KERN_TYPE_OPENCART 13900 +#define KERN_TYPE_DES 14000 +#define KERN_TYPE_3DES 14100 /** * signatures @@ -1522,6 +1531,7 @@ int cisco4_parse_hash (char *input_buf, uint input_len, hash_t *hash int dcc_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); int dcc2_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); int descrypt_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); +int des_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); int episerver_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); int ipb2_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); int joomla_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf); diff --git a/src/hashcat.c b/src/hashcat.c index 077e18775..dacea7d84 100644 --- a/src/hashcat.c +++ b/src/hashcat.c @@ -154,7 +154,7 @@ double TARGET_MS_PROFILE[4] = { 2, 12, 96, 480 }; #define MAX_DICTSTAT 10000 -#define NUM_DEFAULT_BENCHMARK_ALGORITHMS 144 +#define NUM_DEFAULT_BENCHMARK_ALGORITHMS 146 #define NVIDIA_100PERCENTCPU_WORKAROUND 100 @@ -194,6 +194,8 @@ static uint default_benchmark_algorithms[NUM_DEFAULT_BENCHMARK_ALGORITHMS] = 6900, 11700, 11800, + 14000, + 14100, 400, 8900, 11900, @@ -499,6 +501,8 @@ const char *USAGE_BIG[] = " 1460 | HMAC-SHA256 (key = $salt) | Raw Hash, Authenticated", " 1750 | HMAC-SHA512 (key = $pass) | Raw Hash, Authenticated", " 1760 | HMAC-SHA512 (key = $salt) | Raw Hash, Authenticated", + " 14000 | DES (PT = $salt, key = $pass) | Raw Cipher, Known-Plaintext attack", + " 14100 | 3DES (PT = $salt, key = $pass) | Raw Cipher, Known-Plaintext attack", " 400 | phpass | Generic KDF", " 8900 | scrypt | Generic KDF", " 11900 | PBKDF2-HMAC-MD5 | Generic KDF", @@ -6999,7 +7003,7 @@ int main (int argc, char **argv) return -1; } - if (hash_mode_chgd && hash_mode > 13900) // just added to remove compiler warnings for hash_mode_chgd + if (hash_mode_chgd && hash_mode > 14100) // just added to remove compiler warnings for hash_mode_chgd { log_error ("ERROR: Invalid hash-type specified"); @@ -11794,6 +11798,43 @@ int main (int argc, char **argv) dgst_pos3 = 1; break; + case 14000: hash_type = HASH_TYPE_DES; + salt_type = SALT_TYPE_EMBEDDED; + attack_exec = ATTACK_EXEC_INSIDE_KERNEL; + opts_type = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_PT_BITSLICE + | OPTS_TYPE_ST_GENERATE_LE + | OPTS_TYPE_ST_HEX; + kern_type = KERN_TYPE_DES; + dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 + parse_func = des_parse_hash; + sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 + opti_type = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_PERMUT; + dgst_pos0 = 0; + dgst_pos1 = 1; + dgst_pos2 = 2; + dgst_pos3 = 3; + break; + + case 14100: hash_type = HASH_TYPE_DES; + salt_type = SALT_TYPE_EMBEDDED; + attack_exec = ATTACK_EXEC_INSIDE_KERNEL; + opts_type = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_ST_GENERATE_LE + | OPTS_TYPE_ST_HEX; + kern_type = KERN_TYPE_3DES; + dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 + parse_func = des_parse_hash; + sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 + opti_type = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_PERMUT; + dgst_pos0 = 0; + dgst_pos1 = 1; + dgst_pos2 = 2; + dgst_pos3 = 3; + break; + default: usage_mini_print (PROGNAME); return -1; } @@ -12286,6 +12327,12 @@ int main (int argc, char **argv) break; case 12800: if (pw_max > 24) pw_max = 24; break; + case 14000: if (pw_min < 8) pw_min = 8; + if (pw_max > 8) pw_max = 8; + break; + case 14100: if (pw_min < 24) pw_min = 24; + if (pw_max > 24) pw_max = 24; + break; } if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -12978,6 +13025,11 @@ int main (int argc, char **argv) break; case 12600: hashes_buf[0].salt->salt_len = 64; break; + case 14000: hashes_buf[0].salt->salt_len = 8; + break; + case 14100: hashes_buf[0].salt->salt_len = 8; + break; + } // special esalt handling @@ -15871,8 +15923,10 @@ int main (int argc, char **argv) if (hash_mode == 1500) kernel_threads = 64; // DES if (hash_mode == 3000) kernel_threads = 64; // DES + if (hash_mode == 3100) kernel_threads = 64; // DES if (hash_mode == 3200) kernel_threads = 8; // Blowfish if (hash_mode == 7500) kernel_threads = 64; // RC4 + if (hash_mode == 8500) kernel_threads = 64; // DES if (hash_mode == 9000) kernel_threads = 8; // Blowfish if (hash_mode == 9700) kernel_threads = 64; // RC4 if (hash_mode == 9710) kernel_threads = 64; // RC4 @@ -15882,6 +15936,8 @@ int main (int argc, char **argv) if (hash_mode == 10410) kernel_threads = 64; // RC4 if (hash_mode == 10500) kernel_threads = 64; // RC4 if (hash_mode == 13100) kernel_threads = 64; // RC4 + if (hash_mode == 14000) kernel_threads = 64; // DES + if (hash_mode == 14100) kernel_threads = 64; // DES device_param->kernel_threads = kernel_threads; @@ -16070,6 +16126,22 @@ int main (int argc, char **argv) device_param->kernel_loops_max = kernel_loops_fixed; } + if (hash_mode == 14000 && attack_mode == ATTACK_MODE_BF) + { + const u32 kernel_loops_fixed = 1024; + + device_param->kernel_loops_min = kernel_loops_fixed; + device_param->kernel_loops_max = kernel_loops_fixed; + } + + if (hash_mode == 14100 && attack_mode == ATTACK_MODE_BF) + { + const u32 kernel_loops_fixed = 1024; + + device_param->kernel_loops_min = kernel_loops_fixed; + device_param->kernel_loops_max = kernel_loops_fixed; + } + /** * some algorithms have a maximum kernel-loops count */ diff --git a/src/shared.c b/src/shared.c index b8742c527..ee912c65a 100644 --- a/src/shared.c +++ b/src/shared.c @@ -6148,6 +6148,8 @@ char *strhashtype (const uint hash_mode) case 13763: return ((char *) HT_13763); case 13800: return ((char *) HT_13800); case 13900: return ((char *) HT_13900); + case 14000: return ((char *) HT_14000); + case 14100: return ((char *) HT_14100); } return ((char *) "Unknown"); @@ -6201,6 +6203,10 @@ void ascii_digest (char *out_buf, uint salt_pos, uint digest_pos) switch (hash_type) { + case HASH_TYPE_DES: + FP (digest_buf[1], digest_buf[0], tt); + break; + case HASH_TYPE_DESCRYPT: FP (digest_buf[1], digest_buf[0], tt); break; @@ -8879,6 +8885,14 @@ void ascii_digest (char *out_buf, uint salt_pos, uint digest_pos) digest_buf[7], buf); } + else if (hash_mode == 14000) + { + snprintf (out_buf, len - 1, "%08x%08x:%s", digest_buf[0], digest_buf[1], (char *) salt.salt_buf); + } + else if (hash_mode == 14100) + { + snprintf (out_buf, len - 1, "%08x%08x:%s", digest_buf[0], digest_buf[1], (char *) salt.salt_buf); + } else { if (hash_type == HASH_TYPE_MD4) @@ -15098,6 +15112,61 @@ int racf_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf) return (PARSER_OK); } +int des_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf) +{ + if ((input_len < DISPLAY_LEN_MIN_14000) || (input_len > DISPLAY_LEN_MAX_14000)) return (PARSER_GLOBAL_LENGTH); + + u32 *digest = (u32 *) hash_buf->digest; + + salt_t *salt = hash_buf->salt; + + char *digest_pos = input_buf; + + char *salt_pos = strchr (digest_pos, ':'); + + if (input_buf[16] != data.separator) return (PARSER_SEPARATOR_UNMATCHED); + + uint salt_len = salt_pos - digest_pos; + + if (salt_len != 16) return (PARSER_SALT_LENGTH); + + uint hash_len = input_len - 1 - salt_len; + + if (hash_len != 16) return (PARSER_HASH_LENGTH); + + salt_pos++; + + char *salt_buf_ptr = (char *) salt->salt_buf; + + salt_len = parse_and_store_salt (salt_buf_ptr, salt_pos, salt_len); + + if (salt_len == UINT_MAX) return (PARSER_SALT_LENGTH); + + salt->salt_len = salt_len; + + uint tt; + + salt->salt_buf_pc[0] = byte_swap_32 (salt->salt_buf[0]); + salt->salt_buf_pc[1] = byte_swap_32 (salt->salt_buf[1]); + + IP (salt->salt_buf_pc[0], salt->salt_buf_pc[1], tt); + + digest[0] = hex_to_u32 ((const u8 *) &digest_pos[ 0]); + digest[1] = hex_to_u32 ((const u8 *) &digest_pos[ 8]); + + digest[0] = byte_swap_32 (digest[0]); + digest[1] = byte_swap_32 (digest[1]); + + IP (digest[0], digest[1], tt); + + digest[0] = digest[0]; + digest[1] = digest[1]; + digest[2] = 0; + digest[3] = 0; + + return (PARSER_OK); +} + int lotus5_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf) { if ((input_len < DISPLAY_LEN_MIN_8600) || (input_len > DISPLAY_LEN_MAX_8600)) return (PARSER_GLOBAL_LENGTH); diff --git a/tools/test.pl b/tools/test.pl index bc7ce72d3..8d27b6214 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -45,7 +45,7 @@ my $hashcat = "./hashcat"; my $MAX_LEN = 55; -my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 900, 1000, 1100, 1400, 1410, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900); +my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 900, 1000, 1100, 1400, 1410, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900, 14000, 14100); my %is_unicode = map { $_ => 1 } qw(30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800); my %less_fifteen = map { $_ => 1 } qw(500 1600 1800 2400 2410 3200 6300 7400 10500 10700); @@ -226,7 +226,7 @@ sub verify $word = substr ($line, $index + 1); } # hash:salt - elsif ($mode == 10 || $mode == 11 || $mode == 12 || $mode == 20 || $mode == 21 || $mode == 22 || $mode == 23 || $mode == 30 || $mode == 40 || $mode == 50 || $mode == 60 || $mode == 110 || $mode == 112 || $mode == 120 || $mode == 121 || $mode == 130 || $mode == 140 || $mode == 150 || $mode == 160 || $mode == 1100 || $mode == 1410 || $mode == 1420 || $mode == 1430 || $mode == 1440 || $mode == 1450 || $mode == 1460 || $mode == 1710 || $mode == 1720 || $mode == 1730 || $mode == 1740 || $mode == 1750 || $mode == 1760 || $mode == 2410 || $mode == 2611 || $mode == 2711 || $mode == 2811 || $mode == 3100 || $mode == 3610 || $mode == 3710 || $mode == 3720 || $mode == 3800 || $mode == 3910 || $mode == 4010 || $mode == 4110 || $mode == 4210 || $mode == 4900 || $mode == 5800 || $mode == 7600 || $mode == 8400 || $mode == 11000 || $mode == 12600 || $mode == 13500 || $mode == 13800 || $mode == 13900) + elsif ($mode == 10 || $mode == 11 || $mode == 12 || $mode == 20 || $mode == 21 || $mode == 22 || $mode == 23 || $mode == 30 || $mode == 40 || $mode == 50 || $mode == 60 || $mode == 110 || $mode == 112 || $mode == 120 || $mode == 121 || $mode == 130 || $mode == 140 || $mode == 150 || $mode == 160 || $mode == 1100 || $mode == 1410 || $mode == 1420 || $mode == 1430 || $mode == 1440 || $mode == 1450 || $mode == 1460 || $mode == 1710 || $mode == 1720 || $mode == 1730 || $mode == 1740 || $mode == 1750 || $mode == 1760 || $mode == 2410 || $mode == 2611 || $mode == 2711 || $mode == 2811 || $mode == 3100 || $mode == 3610 || $mode == 3710 || $mode == 3720 || $mode == 3800 || $mode == 3910 || $mode == 4010 || $mode == 4110 || $mode == 4210 || $mode == 4900 || $mode == 5800 || $mode == 7600 || $mode == 8400 || $mode == 11000 || $mode == 12600 || $mode == 13500 || $mode == 13800 || $mode == 13900 || $mode == 14000 || $mode == 14100) { # get hash my $index1 = index ($line, ":"); @@ -3167,6 +3167,18 @@ sub passthrough { $tmp_hash = gen_hash ($mode, $word_buf, substr ($salt_buf, 0, 9)); } + elsif ($mode == 14000) + { + next if length ($word_buf) != 8; + + $tmp_hash = gen_hash ($mode, $word_buf, substr ($salt_buf, 0, 16)); + } + elsif ($mode == 14100) + { + next if length ($word_buf) != 24; + + $tmp_hash = gen_hash ($mode, $word_buf, substr ($salt_buf, 0, 16)); + } else { print "ERROR: Unsupported hash type\n"; @@ -3992,6 +4004,14 @@ sub single } } } + elsif ($mode == 14000) + { + rnd ($mode, 8, 16); + } + elsif ($mode == 14100) + { + rnd ($mode, 24, 16); + } } } @@ -7417,6 +7437,38 @@ END_CODE $tmp_hash = sprintf ("%s:%s", $hash_buf, $salt_buf); } + elsif ($mode == 14000) + { + my $salt_buf_bin = pack ("H*", $salt_buf); + + my $cipher = new Crypt::DES $word_buf; + + my $hash_buf = $cipher->encrypt ($salt_buf_bin); + + $tmp_hash = sprintf ("%s:%s", unpack ("H*", $hash_buf), $salt_buf); + } + elsif ($mode == 14100) + { + my $word_buf1 = substr ($word_buf, 0, 8); + my $word_buf2 = substr ($word_buf, 8, 8); + my $word_buf3 = substr ($word_buf, 16, 8); + + my $salt_buf_bin = pack ("H*", $salt_buf); + + my $cipher1 = new Crypt::DES $word_buf1; + + my $hash_buf1 = $cipher1->encrypt ($salt_buf_bin); + + my $cipher2 = new Crypt::DES $word_buf2; + + my $hash_buf2 = $cipher2->decrypt ($hash_buf1); + + my $cipher3 = new Crypt::DES $word_buf3; + + my $hash_buf3 = $cipher3->encrypt ($hash_buf2); + + $tmp_hash = sprintf ("%s:%s", unpack ("H*", $hash_buf3), $salt_buf); + } return ($tmp_hash); } diff --git a/tools/test.sh b/tools/test.sh index d00e8caf7..6717a2c1b 100755 --- a/tools/test.sh +++ b/tools/test.sh @@ -10,7 +10,7 @@ # missing hash types: 5200,6211,6221,6231,6241,6251,6261,6271,6281 -HASH_TYPES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 125 130 131 132 133 140 141 150 160 200 300 400 500 900 1000 1100 1400 1410 1420 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 4300 4400 4500 4700 4800 4900 5000 5100 5300 5400 5500 5600 5700 5800 6000 6100 6300 6400 6500 6600 6700 6800 6900 7100 7200 7300 7400 7500 7600 7700 7800 7900 8000 8100 8200 8300 8400 8500 8600 8700 8900 9100 9200 9300 9400 9500 9600 9700 9800 9900 10000 10100 10200 10300 10400 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11900 12000 12100 12200 12300 12400 12600 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800" +HASH_TYPES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 125 130 131 132 133 140 141 150 160 200 300 400 500 900 1000 1100 1400 1410 1420 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 4300 4400 4500 4700 4800 4900 5000 5100 5300 5400 5500 5600 5700 5800 6000 6100 6300 6400 6500 6600 6700 6800 6900 7100 7200 7300 7400 7500 7600 7700 7800 7900 8000 8100 8200 8300 8400 8500 8600 8700 8900 9100 9200 9300 9400 9500 9600 9700 9800 9900 10000 10100 10200 10300 10400 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11900 12000 12100 12200 12300 12400 12600 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 14000 14100" #ATTACK_MODES="0 1 3 6 7" ATTACK_MODES="0 1 3 7" @@ -186,6 +186,18 @@ function init() fi + if [ "${hash_type}" -eq 14000 ]; then + + min_len=7 + + fi + + if [ "${hash_type}" -eq 14100 ]; then + + min_len=23 + + fi + while read -u 9 pass; do if [ ${i} -gt 1 ]; then @@ -230,6 +242,18 @@ function init() fi + if [ "${hash_type}" -eq 14000 ]; then + + min_len=7 + + fi + + if [ "${hash_type}" -eq 14100 ]; then + + min_len=23 + + fi + # generate multiple pass/hash foreach len (2 to 8) if [ ${MODE} -ge 1 ]; then @@ -612,8 +636,12 @@ function attack_1() offset=7 elif [ ${hash_type} -eq 8500 ]; then offset=7 + elif [ ${hash_type} -eq 14000 ]; then + offset=7 + elif [ ${hash_type} -eq 14100 ]; then + offset=23 fi - + hash_file=${OUTD}/${hash_type}_multihash_combi.txt tail -n ${offset} ${OUTD}/${hash_type}_hashes.txt > ${hash_file} @@ -729,6 +757,20 @@ function attack_3() fi + if [ "${hash_type}" -eq 14000 ]; then + + mask_offset=7 + max=7 + + fi + + if [ "${hash_type}" -eq 14100 ]; then + + mask_offset=23 + max=23 + + fi + i=1 while read -u 9 hash; do @@ -856,6 +898,20 @@ function attack_3() fi + if [ "${hash_type}" -eq 14000 ]; then + + increment_min=8 + increment_max=8 + + fi + + if [ "${hash_type}" -eq 14100 ]; then + + increment_min=24 + increment_max=24 + + fi + hash_file=${OUTD}/${hash_type}_multihash_bruteforce.txt head -n $((increment_max - ${increment_min} + 1)) ${OUTD}/${hash_type}_hashes.txt > ${hash_file} @@ -1056,6 +1112,18 @@ function attack_6() fi + if [ "${hash_type}" -eq 14000 ]; then + + max=6 + + fi + + if [ "${hash_type}" -eq 14100 ]; then + + max=6 + + fi + while read -u 9 hash; do if [ "${i}" -gt 6 ]; then @@ -1155,6 +1223,10 @@ function attack_6() max=8 elif [ ${hash_type} -eq 8500 ]; then max=8 + elif [ ${hash_type} -eq 14000 ]; then + max=5 + elif [ ${hash_type} -eq 14100 ]; then + max=5 fi if ! contains ${hash_type} ${TIMEOUT_ALGOS}; then @@ -1280,6 +1352,18 @@ function attack_7() fi + if [ "${hash_type}" -eq 14000 ]; then + + max=5 + + fi + + if [ "${hash_type}" -eq 14100 ]; then + + max=5 + + fi + i=1 while read -u 9 hash; do @@ -1397,6 +1481,10 @@ function attack_7() max=8 elif [ ${hash_type} -eq 8500 ]; then max=8 + elif [ ${hash_type} -eq 14000 ]; then + max=5 + elif [ ${hash_type} -eq 14100 ]; then + max=5 fi if ! contains ${hash_type} ${TIMEOUT_ALGOS}; then