diff --git a/OpenCL/m03000_a0.cl b/OpenCL/m03000_a0.cl index 7fe973a38..9d36ea665 100644 --- a/OpenCL/m03000_a0.cl +++ b/OpenCL/m03000_a0.cl @@ -44,287 +44,299 @@ __constant u32 c_SPtrans[8][64] = { - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, + { + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + }, }; __constant u32 c_skb[8][64] = { - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + }, }; -__constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; -__constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - #define LM_IV_0_IP_RR3 0x2400b807 #define LM_IV_1_IP_RR3 0xaa190747 @@ -344,28 +356,26 @@ static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); - l = l - ^ BOX (amd_bfe (u, 0, 6), 0, s_SPtrans) - ^ BOX (amd_bfe (u, 8, 6), 2, s_SPtrans) - ^ BOX (amd_bfe (u, 16, 6), 4, s_SPtrans) - ^ BOX (amd_bfe (u, 24, 6), 6, s_SPtrans) - ^ BOX (amd_bfe (t, 0, 6), 1, s_SPtrans) - ^ BOX (amd_bfe (t, 8, 6), 3, s_SPtrans) - ^ BOX (amd_bfe (t, 16, 6), 5, s_SPtrans) - ^ BOX (amd_bfe (t, 24, 6), 7, s_SPtrans); + l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); u = Kc[i + 1] ^ rotl32 (l, 30u); t = Kd[i + 1] ^ rotl32 (l, 26u); - r = r - ^ BOX (amd_bfe (u, 0, 6), 0, s_SPtrans) - ^ BOX (amd_bfe (u, 8, 6), 2, s_SPtrans) - ^ BOX (amd_bfe (u, 16, 6), 4, s_SPtrans) - ^ BOX (amd_bfe (u, 24, 6), 6, s_SPtrans) - ^ BOX (amd_bfe (t, 0, 6), 1, s_SPtrans) - ^ BOX (amd_bfe (t, 8, 6), 3, s_SPtrans) - ^ BOX (amd_bfe (t, 16, 6), 5, s_SPtrans) - ^ BOX (amd_bfe (t, 24, 6), 7, s_SPtrans); + r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); } iv[0] = rotl32 (l, 29); @@ -393,8 +403,16 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u #pragma unroll 16 for (u32 i = 0; i < 16; i++) { - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } c = c & 0x0fffffff; d = d & 0x0fffffff; @@ -406,13 +424,13 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u const u32 c20 = (c >> 20) & 0x00000001; u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); const u32 d00 = (d >> 0) & 0x00003c3f; const u32 d07 = (d >> 7) & 0x00003f03; @@ -420,11 +438,11 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u const u32 d22 = (d >> 22) & 0x00000030; u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); Kc[i] = ((t << 16) | (s & 0x0000ffff)); Kd[i] = ((s >> 16) | (t & 0xffff0000)); diff --git a/OpenCL/m03000_a1.cl b/OpenCL/m03000_a1.cl index c4da6ed8c..e5aa9ce16 100644 --- a/OpenCL/m03000_a1.cl +++ b/OpenCL/m03000_a1.cl @@ -42,287 +42,299 @@ __constant u32 c_SPtrans[8][64] = { - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, + { + 0x02080800, 0x00080000, 0x02000002, 0x02080802, + 0x02000000, 0x00080802, 0x00080002, 0x02000002, + 0x00080802, 0x02080800, 0x02080000, 0x00000802, + 0x02000802, 0x02000000, 0x00000000, 0x00080002, + 0x00080000, 0x00000002, 0x02000800, 0x00080800, + 0x02080802, 0x02080000, 0x00000802, 0x02000800, + 0x00000002, 0x00000800, 0x00080800, 0x02080002, + 0x00000800, 0x02000802, 0x02080002, 0x00000000, + 0x00000000, 0x02080802, 0x02000800, 0x00080002, + 0x02080800, 0x00080000, 0x00000802, 0x02000800, + 0x02080002, 0x00000800, 0x00080800, 0x02000002, + 0x00080802, 0x00000002, 0x02000002, 0x02080000, + 0x02080802, 0x00080800, 0x02080000, 0x02000802, + 0x02000000, 0x00000802, 0x00080002, 0x00000000, + 0x00080000, 0x02000000, 0x02000802, 0x02080800, + 0x00000002, 0x02080002, 0x00000800, 0x00080802, + }, + { + 0x40108010, 0x00000000, 0x00108000, 0x40100000, + 0x40000010, 0x00008010, 0x40008000, 0x00108000, + 0x00008000, 0x40100010, 0x00000010, 0x40008000, + 0x00100010, 0x40108000, 0x40100000, 0x00000010, + 0x00100000, 0x40008010, 0x40100010, 0x00008000, + 0x00108010, 0x40000000, 0x00000000, 0x00100010, + 0x40008010, 0x00108010, 0x40108000, 0x40000010, + 0x40000000, 0x00100000, 0x00008010, 0x40108010, + 0x00100010, 0x40108000, 0x40008000, 0x00108010, + 0x40108010, 0x00100010, 0x40000010, 0x00000000, + 0x40000000, 0x00008010, 0x00100000, 0x40100010, + 0x00008000, 0x40000000, 0x00108010, 0x40008010, + 0x40108000, 0x00008000, 0x00000000, 0x40000010, + 0x00000010, 0x40108010, 0x00108000, 0x40100000, + 0x40100010, 0x00100000, 0x00008010, 0x40008000, + 0x40008010, 0x00000010, 0x40100000, 0x00108000, + }, + { + 0x04000001, 0x04040100, 0x00000100, 0x04000101, + 0x00040001, 0x04000000, 0x04000101, 0x00040100, + 0x04000100, 0x00040000, 0x04040000, 0x00000001, + 0x04040101, 0x00000101, 0x00000001, 0x04040001, + 0x00000000, 0x00040001, 0x04040100, 0x00000100, + 0x00000101, 0x04040101, 0x00040000, 0x04000001, + 0x04040001, 0x04000100, 0x00040101, 0x04040000, + 0x00040100, 0x00000000, 0x04000000, 0x00040101, + 0x04040100, 0x00000100, 0x00000001, 0x00040000, + 0x00000101, 0x00040001, 0x04040000, 0x04000101, + 0x00000000, 0x04040100, 0x00040100, 0x04040001, + 0x00040001, 0x04000000, 0x04040101, 0x00000001, + 0x00040101, 0x04000001, 0x04000000, 0x04040101, + 0x00040000, 0x04000100, 0x04000101, 0x00040100, + 0x04000100, 0x00000000, 0x04040001, 0x00000101, + 0x04000001, 0x00040101, 0x00000100, 0x04040000, + }, + { + 0x00401008, 0x10001000, 0x00000008, 0x10401008, + 0x00000000, 0x10400000, 0x10001008, 0x00400008, + 0x10401000, 0x10000008, 0x10000000, 0x00001008, + 0x10000008, 0x00401008, 0x00400000, 0x10000000, + 0x10400008, 0x00401000, 0x00001000, 0x00000008, + 0x00401000, 0x10001008, 0x10400000, 0x00001000, + 0x00001008, 0x00000000, 0x00400008, 0x10401000, + 0x10001000, 0x10400008, 0x10401008, 0x00400000, + 0x10400008, 0x00001008, 0x00400000, 0x10000008, + 0x00401000, 0x10001000, 0x00000008, 0x10400000, + 0x10001008, 0x00000000, 0x00001000, 0x00400008, + 0x00000000, 0x10400008, 0x10401000, 0x00001000, + 0x10000000, 0x10401008, 0x00401008, 0x00400000, + 0x10401008, 0x00000008, 0x10001000, 0x00401008, + 0x00400008, 0x00401000, 0x10400000, 0x10001008, + 0x00001008, 0x10000000, 0x10000008, 0x10401000, + }, + { + 0x08000000, 0x00010000, 0x00000400, 0x08010420, + 0x08010020, 0x08000400, 0x00010420, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x00010400, + 0x08000420, 0x08010020, 0x08010400, 0x00000000, + 0x00010400, 0x08000000, 0x00010020, 0x00000420, + 0x08000400, 0x00010420, 0x00000000, 0x08000020, + 0x00000020, 0x08000420, 0x08010420, 0x00010020, + 0x08010000, 0x00000400, 0x00000420, 0x08010400, + 0x08010400, 0x08000420, 0x00010020, 0x08010000, + 0x00010000, 0x00000020, 0x08000020, 0x08000400, + 0x08000000, 0x00010400, 0x08010420, 0x00000000, + 0x00010420, 0x08000000, 0x00000400, 0x00010020, + 0x08000420, 0x00000400, 0x00000000, 0x08010420, + 0x08010020, 0x08010400, 0x00000420, 0x00010000, + 0x00010400, 0x08010020, 0x08000400, 0x00000420, + 0x00000020, 0x00010420, 0x08010000, 0x08000020, + }, + { + 0x80000040, 0x00200040, 0x00000000, 0x80202000, + 0x00200040, 0x00002000, 0x80002040, 0x00200000, + 0x00002040, 0x80202040, 0x00202000, 0x80000000, + 0x80002000, 0x80000040, 0x80200000, 0x00202040, + 0x00200000, 0x80002040, 0x80200040, 0x00000000, + 0x00002000, 0x00000040, 0x80202000, 0x80200040, + 0x80202040, 0x80200000, 0x80000000, 0x00002040, + 0x00000040, 0x00202000, 0x00202040, 0x80002000, + 0x00002040, 0x80000000, 0x80002000, 0x00202040, + 0x80202000, 0x00200040, 0x00000000, 0x80002000, + 0x80000000, 0x00002000, 0x80200040, 0x00200000, + 0x00200040, 0x80202040, 0x00202000, 0x00000040, + 0x80202040, 0x00202000, 0x00200000, 0x80002040, + 0x80000040, 0x80200000, 0x00202040, 0x00000000, + 0x00002000, 0x80000040, 0x80002040, 0x80202000, + 0x80200000, 0x00002040, 0x00000040, 0x80200040, + }, + { + 0x00004000, 0x00000200, 0x01000200, 0x01000004, + 0x01004204, 0x00004004, 0x00004200, 0x00000000, + 0x01000000, 0x01000204, 0x00000204, 0x01004000, + 0x00000004, 0x01004200, 0x01004000, 0x00000204, + 0x01000204, 0x00004000, 0x00004004, 0x01004204, + 0x00000000, 0x01000200, 0x01000004, 0x00004200, + 0x01004004, 0x00004204, 0x01004200, 0x00000004, + 0x00004204, 0x01004004, 0x00000200, 0x01000000, + 0x00004204, 0x01004000, 0x01004004, 0x00000204, + 0x00004000, 0x00000200, 0x01000000, 0x01004004, + 0x01000204, 0x00004204, 0x00004200, 0x00000000, + 0x00000200, 0x01000004, 0x00000004, 0x01000200, + 0x00000000, 0x01000204, 0x01000200, 0x00004200, + 0x00000204, 0x00004000, 0x01004204, 0x01000000, + 0x01004200, 0x00000004, 0x00004004, 0x01004204, + 0x01000004, 0x01004200, 0x01004000, 0x00004004, + }, + { + 0x20800080, 0x20820000, 0x00020080, 0x00000000, + 0x20020000, 0x00800080, 0x20800000, 0x20820080, + 0x00000080, 0x20000000, 0x00820000, 0x00020080, + 0x00820080, 0x20020080, 0x20000080, 0x20800000, + 0x00020000, 0x00820080, 0x00800080, 0x20020000, + 0x20820080, 0x20000080, 0x00000000, 0x00820000, + 0x20000000, 0x00800000, 0x20020080, 0x20800080, + 0x00800000, 0x00020000, 0x20820000, 0x00000080, + 0x00800000, 0x00020000, 0x20000080, 0x20820080, + 0x00020080, 0x20000000, 0x00000000, 0x00820000, + 0x20800080, 0x20020080, 0x20020000, 0x00800080, + 0x20820000, 0x00000080, 0x00800080, 0x20020000, + 0x20820080, 0x00800000, 0x20800000, 0x20000080, + 0x00820000, 0x00020080, 0x20020080, 0x20800000, + 0x00000080, 0x20820000, 0x00820080, 0x00000000, + 0x20000000, 0x20800080, 0x00020000, 0x00820080, + }, }; __constant u32 c_skb[8][64] = { - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 + { + 0x00000000, 0x00000010, 0x20000000, 0x20000010, + 0x00010000, 0x00010010, 0x20010000, 0x20010010, + 0x00000800, 0x00000810, 0x20000800, 0x20000810, + 0x00010800, 0x00010810, 0x20010800, 0x20010810, + 0x00000020, 0x00000030, 0x20000020, 0x20000030, + 0x00010020, 0x00010030, 0x20010020, 0x20010030, + 0x00000820, 0x00000830, 0x20000820, 0x20000830, + 0x00010820, 0x00010830, 0x20010820, 0x20010830, + 0x00080000, 0x00080010, 0x20080000, 0x20080010, + 0x00090000, 0x00090010, 0x20090000, 0x20090010, + 0x00080800, 0x00080810, 0x20080800, 0x20080810, + 0x00090800, 0x00090810, 0x20090800, 0x20090810, + 0x00080020, 0x00080030, 0x20080020, 0x20080030, + 0x00090020, 0x00090030, 0x20090020, 0x20090030, + 0x00080820, 0x00080830, 0x20080820, 0x20080830, + }, + { + 0x00000000, 0x02000000, 0x00002000, 0x02002000, + 0x00200000, 0x02200000, 0x00202000, 0x02202000, + 0x00000004, 0x02000004, 0x00002004, 0x02002004, + 0x00200004, 0x02200004, 0x00202004, 0x02202004, + 0x00000400, 0x02000400, 0x00002400, 0x02002400, + 0x00200400, 0x02200400, 0x00202400, 0x02202400, + 0x00000404, 0x02000404, 0x00002404, 0x02002404, + 0x00200404, 0x02200404, 0x00202404, 0x02202404, + 0x10000000, 0x12000000, 0x10002000, 0x12002000, + 0x10200000, 0x12200000, 0x10202000, 0x12202000, + 0x10000004, 0x12000004, 0x10002004, 0x12002004, + 0x10200004, 0x12200004, 0x10202004, 0x12202004, + 0x10000400, 0x12000400, 0x10002400, 0x12002400, + 0x10200400, 0x12200400, 0x10202400, 0x12202400, + 0x10000404, 0x12000404, 0x10002404, 0x12002404, + 0x10200404, 0x12200404, 0x10202404, 0x12202404, + }, + { + 0x00000000, 0x00000001, 0x00040000, 0x00040001, + 0x01000000, 0x01000001, 0x01040000, 0x01040001, + 0x00000002, 0x00000003, 0x00040002, 0x00040003, + 0x01000002, 0x01000003, 0x01040002, 0x01040003, + 0x00000200, 0x00000201, 0x00040200, 0x00040201, + 0x01000200, 0x01000201, 0x01040200, 0x01040201, + 0x00000202, 0x00000203, 0x00040202, 0x00040203, + 0x01000202, 0x01000203, 0x01040202, 0x01040203, + 0x08000000, 0x08000001, 0x08040000, 0x08040001, + 0x09000000, 0x09000001, 0x09040000, 0x09040001, + 0x08000002, 0x08000003, 0x08040002, 0x08040003, + 0x09000002, 0x09000003, 0x09040002, 0x09040003, + 0x08000200, 0x08000201, 0x08040200, 0x08040201, + 0x09000200, 0x09000201, 0x09040200, 0x09040201, + 0x08000202, 0x08000203, 0x08040202, 0x08040203, + 0x09000202, 0x09000203, 0x09040202, 0x09040203, + }, + { + 0x00000000, 0x00100000, 0x00000100, 0x00100100, + 0x00000008, 0x00100008, 0x00000108, 0x00100108, + 0x00001000, 0x00101000, 0x00001100, 0x00101100, + 0x00001008, 0x00101008, 0x00001108, 0x00101108, + 0x04000000, 0x04100000, 0x04000100, 0x04100100, + 0x04000008, 0x04100008, 0x04000108, 0x04100108, + 0x04001000, 0x04101000, 0x04001100, 0x04101100, + 0x04001008, 0x04101008, 0x04001108, 0x04101108, + 0x00020000, 0x00120000, 0x00020100, 0x00120100, + 0x00020008, 0x00120008, 0x00020108, 0x00120108, + 0x00021000, 0x00121000, 0x00021100, 0x00121100, + 0x00021008, 0x00121008, 0x00021108, 0x00121108, + 0x04020000, 0x04120000, 0x04020100, 0x04120100, + 0x04020008, 0x04120008, 0x04020108, 0x04120108, + 0x04021000, 0x04121000, 0x04021100, 0x04121100, + 0x04021008, 0x04121008, 0x04021108, 0x04121108, + }, + { + 0x00000000, 0x10000000, 0x00010000, 0x10010000, + 0x00000004, 0x10000004, 0x00010004, 0x10010004, + 0x20000000, 0x30000000, 0x20010000, 0x30010000, + 0x20000004, 0x30000004, 0x20010004, 0x30010004, + 0x00100000, 0x10100000, 0x00110000, 0x10110000, + 0x00100004, 0x10100004, 0x00110004, 0x10110004, + 0x20100000, 0x30100000, 0x20110000, 0x30110000, + 0x20100004, 0x30100004, 0x20110004, 0x30110004, + 0x00001000, 0x10001000, 0x00011000, 0x10011000, + 0x00001004, 0x10001004, 0x00011004, 0x10011004, + 0x20001000, 0x30001000, 0x20011000, 0x30011000, + 0x20001004, 0x30001004, 0x20011004, 0x30011004, + 0x00101000, 0x10101000, 0x00111000, 0x10111000, + 0x00101004, 0x10101004, 0x00111004, 0x10111004, + 0x20101000, 0x30101000, 0x20111000, 0x30111000, + 0x20101004, 0x30101004, 0x20111004, 0x30111004, + }, + { + 0x00000000, 0x08000000, 0x00000008, 0x08000008, + 0x00000400, 0x08000400, 0x00000408, 0x08000408, + 0x00020000, 0x08020000, 0x00020008, 0x08020008, + 0x00020400, 0x08020400, 0x00020408, 0x08020408, + 0x00000001, 0x08000001, 0x00000009, 0x08000009, + 0x00000401, 0x08000401, 0x00000409, 0x08000409, + 0x00020001, 0x08020001, 0x00020009, 0x08020009, + 0x00020401, 0x08020401, 0x00020409, 0x08020409, + 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, + 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, + 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, + 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, + 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, + 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, + 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, + 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, + }, + { + 0x00000000, 0x00000100, 0x00080000, 0x00080100, + 0x01000000, 0x01000100, 0x01080000, 0x01080100, + 0x00000010, 0x00000110, 0x00080010, 0x00080110, + 0x01000010, 0x01000110, 0x01080010, 0x01080110, + 0x00200000, 0x00200100, 0x00280000, 0x00280100, + 0x01200000, 0x01200100, 0x01280000, 0x01280100, + 0x00200010, 0x00200110, 0x00280010, 0x00280110, + 0x01200010, 0x01200110, 0x01280010, 0x01280110, + 0x00000200, 0x00000300, 0x00080200, 0x00080300, + 0x01000200, 0x01000300, 0x01080200, 0x01080300, + 0x00000210, 0x00000310, 0x00080210, 0x00080310, + 0x01000210, 0x01000310, 0x01080210, 0x01080310, + 0x00200200, 0x00200300, 0x00280200, 0x00280300, + 0x01200200, 0x01200300, 0x01280200, 0x01280300, + 0x00200210, 0x00200310, 0x00280210, 0x00280310, + 0x01200210, 0x01200310, 0x01280210, 0x01280310, + }, + { + 0x00000000, 0x04000000, 0x00040000, 0x04040000, + 0x00000002, 0x04000002, 0x00040002, 0x04040002, + 0x00002000, 0x04002000, 0x00042000, 0x04042000, + 0x00002002, 0x04002002, 0x00042002, 0x04042002, + 0x00000020, 0x04000020, 0x00040020, 0x04040020, + 0x00000022, 0x04000022, 0x00040022, 0x04040022, + 0x00002020, 0x04002020, 0x00042020, 0x04042020, + 0x00002022, 0x04002022, 0x00042022, 0x04042022, + 0x00000800, 0x04000800, 0x00040800, 0x04040800, + 0x00000802, 0x04000802, 0x00040802, 0x04040802, + 0x00002800, 0x04002800, 0x00042800, 0x04042800, + 0x00002802, 0x04002802, 0x00042802, 0x04042802, + 0x00000820, 0x04000820, 0x00040820, 0x04040820, + 0x00000822, 0x04000822, 0x00040822, 0x04040822, + 0x00002820, 0x04002820, 0x00042820, 0x04042820, + 0x00002822, 0x04002822, 0x00042822, 0x04042822 + }, }; -__constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; -__constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - #define LM_IV_0_IP_RR3 0x2400b807 #define LM_IV_1_IP_RR3 0xaa190747 @@ -342,28 +354,26 @@ static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); - l = l - ^ BOX (amd_bfe (u, 0, 6), 0, s_SPtrans) - ^ BOX (amd_bfe (u, 8, 6), 2, s_SPtrans) - ^ BOX (amd_bfe (u, 16, 6), 4, s_SPtrans) - ^ BOX (amd_bfe (u, 24, 6), 6, s_SPtrans) - ^ BOX (amd_bfe (t, 0, 6), 1, s_SPtrans) - ^ BOX (amd_bfe (t, 8, 6), 3, s_SPtrans) - ^ BOX (amd_bfe (t, 16, 6), 5, s_SPtrans) - ^ BOX (amd_bfe (t, 24, 6), 7, s_SPtrans); + l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); u = Kc[i + 1] ^ rotl32 (l, 30u); t = Kd[i + 1] ^ rotl32 (l, 26u); - r = r - ^ BOX (amd_bfe (u, 0, 6), 0, s_SPtrans) - ^ BOX (amd_bfe (u, 8, 6), 2, s_SPtrans) - ^ BOX (amd_bfe (u, 16, 6), 4, s_SPtrans) - ^ BOX (amd_bfe (u, 24, 6), 6, s_SPtrans) - ^ BOX (amd_bfe (t, 0, 6), 1, s_SPtrans) - ^ BOX (amd_bfe (t, 8, 6), 3, s_SPtrans) - ^ BOX (amd_bfe (t, 16, 6), 5, s_SPtrans) - ^ BOX (amd_bfe (t, 24, 6), 7, s_SPtrans); + r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) + | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) + | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) + | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) + | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) + | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) + | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) + | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); } iv[0] = rotl32 (l, 29); @@ -391,8 +401,16 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u #pragma unroll 16 for (u32 i = 0; i < 16; i++) { - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; + if ((i < 2) || (i == 8) || (i == 15)) + { + c = ((c >> 1) | (c << 27)); + d = ((d >> 1) | (d << 27)); + } + else + { + c = ((c >> 2) | (c << 26)); + d = ((d >> 2) | (d << 26)); + } c = c & 0x0fffffff; d = d & 0x0fffffff; @@ -404,13 +422,13 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u const u32 c20 = (c >> 20) & 0x00000001; u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); + | BOX (((c06 >> 0) & 0xff) + |((c07 >> 0) & 0xff), 1, s_skb) + | BOX (((c13 >> 0) & 0xff) + |((c06 >> 8) & 0xff), 2, s_skb) + | BOX (((c20 >> 0) & 0xff) + |((c13 >> 8) & 0xff) + |((c06 >> 16) & 0xff), 3, s_skb); const u32 d00 = (d >> 0) & 0x00003c3f; const u32 d07 = (d >> 7) & 0x00003f03; @@ -418,11 +436,11 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u const u32 d22 = (d >> 22) & 0x00000030; u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); + | BOX (((d07 >> 0) & 0xff) + |((d00 >> 8) & 0xff), 5, s_skb) + | BOX (((d07 >> 8) & 0xff), 6, s_skb) + | BOX (((d21 >> 0) & 0xff) + |((d22 >> 0) & 0xff), 7, s_skb); Kc[i] = ((t << 16) | (s & 0x0000ffff)); Kd[i] = ((s >> 16) | (t & 0xffff0000)); diff --git a/OpenCL/m03000_a3.cl b/OpenCL/m03000_a3.cl index 5556a6c37..1d61ef473 100644 --- a/OpenCL/m03000_a3.cl +++ b/OpenCL/m03000_a3.cl @@ -21,7 +21,884 @@ #define COMPARE_S "check_single_comp4_bs.c" #define COMPARE_M "check_multi_comp4_bs.c" +#ifdef IS_NV #define KXX_DECL +#endif + +#ifdef IS_AMD +#define KXX_DECL volatile +#endif + +#ifdef IS_NV + +#if CUDA_ARCH >= 500 + +// +// Bitslice DES S-boxes with LOP3.LUT instructions +// For NVIDIA Maxwell architecture and CUDA 7.5 RC +// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19 +// +// Gate counts: 25 24 25 18 25 24 24 23 +// Average: 23.5 +// Depth: 8 7 7 6 8 10 10 8 +// Average: 8 +// +// Note that same S-box function with a lower gate count isn't necessarily faster. +// +// These Boolean expressions corresponding to DES S-boxes were +// discovered by +// +// This file itself is Copyright (c) 2015 by +// Redistribution and use in source and binary forms, with or without +// modification, are permitted. +// +// The underlying mathematical formulas are NOT copyrighted. +// + +#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d)); + +static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) + LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) + LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) + LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) + LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) + LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) + LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) + LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) + LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) + LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) + LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) + LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) + LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) + LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) + LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) + LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) + LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) + LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) + LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) + LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) + LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) + LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) + LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) + LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) + LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) + LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) + LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) + LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) + LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) + LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) + LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) + LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) + LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) + LUT(x3333CCCC00000000, a2, a5, a6, 0x14) + LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) + LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) + LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) + LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) + LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) + LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) + LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) + LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) + LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) + LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) + LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) + LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) + LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) + LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) + LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) + LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) + LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) + LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) + LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) + LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) + LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) + LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) + LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) + LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) + LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) + LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) + LUT(x500F500F500F500F, a1, a3, a4, 0x98) + LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) + LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) + LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) + LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) + LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) + LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) + LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) + LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) + LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) + LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) + LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(x55F055F055F055F0, a1, a3, a4, 0x72) + LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) + LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) + LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) + LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) + LUT(x9999666699996666, a1, a2, a5, 0x69) + LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) + LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) + LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) + LUT(x4848484848484848, a1, a2, a3, 0x12) + LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) + LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) + LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) + LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) + LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) + LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) + LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) + LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) + LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) + LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) + LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) + LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) + LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) + LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) + LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) + LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) + LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) + LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) + LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) + LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) + LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) + LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) + LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) + LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) + LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) + LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) + LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) + LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) + LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) + LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) + LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) + LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) + LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) + LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) + LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) + LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) + LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) + LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) + LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) + LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) + LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) + LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) + LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) + LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) + LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) + LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) + LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) + LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) + LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) + LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) + LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) + LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) + LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) + LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) + LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) + LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) + LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) + LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) + LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) + LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) + LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) + LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) + LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) + LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) + LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) + LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) + LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) + LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) + LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) + LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) + LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) + LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) + LUT(xA050A050A050A050, a1, a3, a4, 0x21) + LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) + LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) + LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) + LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) + LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) + LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) + LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) + LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) + LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) + LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) + LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) + LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) + LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) + LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) + LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) + LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) + LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) + LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) + LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) + LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) + LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) + LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) + LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) + LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) + LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) + LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) + LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; +} + +#else + +/* + * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC + * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. + * + * Gate counts: 49 44 46 33 48 46 46 41 + * Average: 44.125 + * + * Several same-gate-count expressions for each S-box are included (for use on + * different CPUs/GPUs). + * + * These Boolean expressions corresponding to DES S-boxes have been generated + * by Roman Rusakov for use in Openwall's + * John the Ripper password cracker: http://www.openwall.com/john/ + * Being mathematical formulas, they are not copyrighted and are free for reuse + * by anyone. + * + * This file (a specific representation of the S-box expressions, surrounding + * logic) is Copyright (c) 2011 by Solar Designer . + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. (This is a heavily cut-down "BSD license".) + * + * The effort has been sponsored by Rapid7: http://www.rapid7.com + */ + +static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, + x25202160; + u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; + u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; + u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; + u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; + u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; + u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; + u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x55005500 = a1 & ~a5; + x5A0F5A0F = a4 ^ x55005500; + x3333FFFF = a3 | a6; + x66666666 = a1 ^ a3; + x22226666 = x3333FFFF & x66666666; + x2D2D6969 = a4 ^ x22226666; + x25202160 = x2D2D6969 & ~x5A0F5A0F; + + x00FFFF00 = a5 ^ a6; + x33CCCC33 = a3 ^ x00FFFF00; + x4803120C = x5A0F5A0F & ~x33CCCC33; + x2222FFFF = a6 | x22226666; + x6A21EDF3 = x4803120C ^ x2222FFFF; + x4A01CC93 = x6A21EDF3 & ~x25202160; + + x5555FFFF = a1 | a6; + x7F75FFFF = x6A21EDF3 | x5555FFFF; + x00D20096 = a5 & ~x2D2D6969; + x7FA7FF69 = x7F75FFFF ^ x00D20096; + + x0A0A0000 = a4 & ~x5555FFFF; + x0AD80096 = x00D20096 ^ x0A0A0000; + x00999900 = x00FFFF00 & ~x66666666; + x0AD99996 = x0AD80096 | x00999900; + + x22332233 = a3 & ~x55005500; + x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; + x054885C0 = x257AA5F0 & ~x22332233; + xFAB77A3F = ~x054885C0; + x2221EDF3 = x3333FFFF & x6A21EDF3; + xD89697CC = xFAB77A3F ^ x2221EDF3; + x20 = x7FA7FF69 & ~a2; + x21 = x20 ^ xD89697CC; + *out3 ^= x21; + + x05B77AC0 = x00FFFF00 ^ x054885C0; + x05F77AD6 = x00D20096 | x05B77AC0; + x36C48529 = x3333FFFF ^ x05F77AD6; + x6391D07C = a1 ^ x36C48529; + xBB0747B0 = xD89697CC ^ x6391D07C; + x00 = x25202160 | a2; + x01 = x00 ^ xBB0747B0; + *out1 ^= x01; + + x4C460000 = x3333FFFF ^ x7F75FFFF; + x4EDF9996 = x0AD99996 | x4C460000; + x2D4E49EA = x6391D07C ^ x4EDF9996; + xBBFFFFB0 = x00FFFF00 | xBB0747B0; + x96B1B65A = x2D4E49EA ^ xBBFFFFB0; + x10 = x4A01CC93 | a2; + x11 = x10 ^ x96B1B65A; + *out2 ^= x11; + + x5AFF5AFF = a5 | x5A0F5A0F; + x52B11215 = x5AFF5AFF & ~x2D4E49EA; + x4201C010 = x4A01CC93 & x6391D07C; + x10B0D205 = x52B11215 ^ x4201C010; + x30 = x10B0D205 | a2; + x31 = x30 ^ x0AD99996; + *out4 ^= x31; +} + +static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x33CC33CC; + u32 x55550000, x00AA00FF, x33BB33FF; + u32 x33CC0000, x11441144, x11BB11BB, x003311BB; + u32 x00000F0F, x336600FF, x332200FF, x332200F0; + u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; + u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; + u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; + u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; + u32 x0A451047, xBBDFDD7B, xB19ACD3C; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x33CC33CC = a2 ^ a5; + + x55550000 = a1 & ~a6; + x00AA00FF = a5 & ~x55550000; + x33BB33FF = a2 | x00AA00FF; + + x33CC0000 = x33CC33CC & ~a6; + x11441144 = a1 & x33CC33CC; + x11BB11BB = a5 ^ x11441144; + x003311BB = x11BB11BB & ~x33CC0000; + + x00000F0F = a3 & a6; + x336600FF = x00AA00FF ^ x33CC0000; + x332200FF = x33BB33FF & x336600FF; + x332200F0 = x332200FF & ~x00000F0F; + + x0302000F = a3 & x332200FF; + xAAAAAAAA = ~a1; + xA9A8AAA5 = x0302000F ^ xAAAAAAAA; + x33CCCC33 = a6 ^ x33CC33CC; + x33CCC030 = x33CCCC33 & ~x00000F0F; + x9A646A95 = xA9A8AAA5 ^ x33CCC030; + x10 = a4 & ~x332200F0; + x11 = x10 ^ x9A646A95; + *out2 ^= x11; + + x00333303 = a2 & ~x33CCC030; + x118822B8 = x11BB11BB ^ x00333303; + xA8208805 = xA9A8AAA5 & ~x118822B8; + x3CC3C33C = a3 ^ x33CCCC33; + x94E34B39 = xA8208805 ^ x3CC3C33C; + x00 = x33BB33FF & ~a4; + x01 = x00 ^ x94E34B39; + *out1 ^= x01; + + x0331330C = x0302000F ^ x00333303; + x3FF3F33C = x3CC3C33C | x0331330C; + xA9DF596A = x33BB33FF ^ x9A646A95; + xA9DF5F6F = x00000F0F | xA9DF596A; + x962CAC53 = x3FF3F33C ^ xA9DF5F6F; + + xA9466A6A = x332200FF ^ x9A646A95; + x3DA52153 = x94E34B39 ^ xA9466A6A; + x29850143 = xA9DF5F6F & x3DA52153; + x33C0330C = x33CC33CC & x3FF3F33C; + x1A45324F = x29850143 ^ x33C0330C; + x20 = x1A45324F | a4; + x21 = x20 ^ x962CAC53; + *out3 ^= x21; + + x0A451047 = x1A45324F & ~x118822B8; + xBBDFDD7B = x33CCCC33 | xA9DF596A; + xB19ACD3C = x0A451047 ^ xBBDFDD7B; + x30 = x003311BB | a4; + x31 = x30 ^ xB19ACD3C; + *out4 ^= x31; +} + +static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; + u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; + u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; + u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; + u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; + u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; + u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; + u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x44444444 = a1 & ~a2; + x0F0FF0F0 = a3 ^ a6; + x4F4FF4F4 = x44444444 | x0F0FF0F0; + x00FFFF00 = a4 ^ a6; + x00AAAA00 = x00FFFF00 & ~a1; + x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; + + x3C3CC3C3 = a2 ^ x0F0FF0F0; + x3C3C0000 = x3C3CC3C3 & ~a6; + x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; + x0C840A00 = x4FE55EF4 & ~x7373F4F4; + + x00005EF4 = a6 & x4FE55EF4; + x00FF5EFF = a4 | x00005EF4; + x00555455 = a1 & x00FF5EFF; + x3C699796 = x3C3CC3C3 ^ x00555455; + x30 = x4FE55EF4 & ~a5; + x31 = x30 ^ x3C699796; + *out4 ^= x31; + + x000FF000 = x0F0FF0F0 & x00FFFF00; + x55AA55AA = a1 ^ a4; + x26D9A15E = x7373F4F4 ^ x55AA55AA; + x2FDFAF5F = a3 | x26D9A15E; + x2FD00F5F = x2FDFAF5F & ~x000FF000; + + x55AAFFAA = x00AAAA00 | x55AA55AA; + x28410014 = x3C699796 & ~x55AAFFAA; + x000000FF = a4 & a6; + x000000CC = x000000FF & ~a2; + x284100D8 = x28410014 ^ x000000CC; + + x204100D0 = x7373F4F4 & x284100D8; + x3C3CC3FF = x3C3CC3C3 | x000000FF; + x1C3CC32F = x3C3CC3FF & ~x204100D0; + x4969967A = a1 ^ x1C3CC32F; + x10 = x2FD00F5F & a5; + x11 = x10 ^ x4969967A; + *out2 ^= x11; + + x4CC44CC4 = x4FE55EF4 & ~a2; + x40C040C0 = x4CC44CC4 & ~a3; + xC3C33C3C = ~x3C3CC3C3; + x9669C396 = x55AAFFAA ^ xC3C33C3C; + xD6A98356 = x40C040C0 ^ x9669C396; + x00 = a5 & ~x0C840A00; + x01 = x00 ^ xD6A98356; + *out1 ^= x01; + + xD6E9C3D6 = x40C040C0 | x9669C396; + x4CEEEEC4 = x00AAAA00 | x4CC44CC4; + x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; + x001A000B = a4 & ~x4FE55EF4; + x9A1F2D1B = x9A072D12 | x001A000B; + x20 = a5 & ~x284100D8; + x21 = x20 ^ x9A1F2D1B; + *out3 ^= x21; +} + +static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x5A5A5A5A, x0F0FF0F0; + u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, + x52FBCA0F, x61C8F93C; + u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; + u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; + u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x5A5A5A5A = a1 ^ a3; + x0F0FF0F0 = a3 ^ a5; + x33FF33FF = a2 | a4; + x33FFCC00 = a5 ^ x33FF33FF; + x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; + x0C0CC0C0 = x0F0FF0F0 & ~a2; + x0CF3C03F = a4 ^ x0C0CC0C0; + x5EFBDA7F = x5A5A5A5A | x0CF3C03F; + x52FBCA0F = x5EFBDA7F & ~x0C0030F0; + x61C8F93C = a2 ^ x52FBCA0F; + + x00C0C03C = x0CF3C03F & x61C8F93C; + x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; + x3B92A366 = x5A5A5A5A ^ x61C8F93C; + x30908326 = x3B92A366 & ~x0F0F30C0; + x3C90B3D6 = x0C0030F0 ^ x30908326; + + x33CC33CC = a2 ^ a4; + x0C0CFFFF = a5 | x0C0CC0C0; + x379E5C99 = x3B92A366 ^ x0C0CFFFF; + x04124C11 = x379E5C99 & ~x33CC33CC; + x56E9861E = x52FBCA0F ^ x04124C11; + x00 = a6 & ~x3C90B3D6; + x01 = x00 ^ x56E9861E; + *out1 ^= x01; + + xA91679E1 = ~x56E9861E; + x10 = x3C90B3D6 & ~a6; + x11 = x10 ^ xA91679E1; + *out2 ^= x11; + + x9586CA37 = x3C90B3D6 ^ xA91679E1; + x8402C833 = x9586CA37 & ~x33CC33CC; + x84C2C83F = x00C0C03C | x8402C833; + xB35C94A6 = x379E5C99 ^ x84C2C83F; + x20 = x61C8F93C | a6; + x21 = x20 ^ xB35C94A6; + *out3 ^= x21; + + x30 = a6 & x61C8F93C; + x31 = x30 ^ xB35C94A6; + *out4 ^= x31; +} + +static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; + u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; + u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; + u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; + u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; + u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; + u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; + u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x77777777 = a1 | a3; + x77770000 = x77777777 & ~a6; + x22225555 = a1 ^ x77770000; + x11116666 = a3 ^ x22225555; + x1F1F6F6F = a4 | x11116666; + + x70700000 = x77770000 & ~a4; + x43433333 = a3 ^ x70700000; + x00430033 = a5 & x43433333; + x55557777 = a1 | x11116666; + x55167744 = x00430033 ^ x55557777; + x5A19784B = a4 ^ x55167744; + + x5A1987B4 = a6 ^ x5A19784B; + x7A3BD7F5 = x22225555 | x5A1987B4; + x003B00F5 = a5 & x7A3BD7F5; + x221955A0 = x22225555 ^ x003B00F5; + x05050707 = a4 & x55557777; + x271C52A7 = x221955A0 ^ x05050707; + + x2A2A82A0 = x7A3BD7F5 & ~a1; + x6969B193 = x43433333 ^ x2A2A82A0; + x1FE06F90 = a5 ^ x1F1F6F6F; + x16804E00 = x1FE06F90 & ~x6969B193; + xE97FB1FF = ~x16804E00; + x20 = xE97FB1FF & ~a2; + x21 = x20 ^ x5A19784B; + *out3 ^= x21; + + x43403302 = x43433333 & ~x003B00F5; + x35CAED30 = x2A2A82A0 ^ x1FE06F90; + x37DEFFB7 = x271C52A7 | x35CAED30; + x349ECCB5 = x37DEFFB7 & ~x43403302; + x0B01234A = x1F1F6F6F & ~x349ECCB5; + + x101884B4 = x5A1987B4 & x349ECCB5; + x0FF8EB24 = x1FE06F90 ^ x101884B4; + x41413333 = x43433333 & x55557777; + x4FF9FB37 = x0FF8EB24 | x41413333; + x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; + x30 = x4FC2FBC2 & a2; + x31 = x30 ^ x271C52A7; + *out4 ^= x31; + + x22222222 = a1 ^ x77777777; + x16BCEE97 = x349ECCB5 ^ x22222222; + x0F080B04 = a4 & x0FF8EB24; + x19B4E593 = x16BCEE97 ^ x0F080B04; + x00 = x0B01234A | a2; + x01 = x00 ^ x19B4E593; + *out1 ^= x01; + + x5C5C5C5C = x1F1F6F6F ^ x43433333; + x4448184C = x5C5C5C5C & ~x19B4E593; + x2DDABE71 = x22225555 ^ x0FF8EB24; + x6992A63D = x4448184C ^ x2DDABE71; + x10 = x1F1F6F6F & a2; + x11 = x10 ^ x6992A63D; + *out2 ^= x11; +} + +static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x33CC33CC; + u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; + u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; + u32 x09030C06, x09030000, x336622FF, x3A6522FF; + u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; + u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; + u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; + u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; + u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x33CC33CC = a2 ^ a5; + + x3333FFFF = a2 | a6; + x11115555 = a1 & x3333FFFF; + x22DD6699 = x33CC33CC ^ x11115555; + x22DD9966 = a6 ^ x22DD6699; + x00220099 = a5 & ~x22DD9966; + + x00551144 = a1 & x22DD9966; + x33662277 = a2 ^ x00551144; + x5A5A5A5A = a1 ^ a3; + x7B7E7A7F = x33662277 | x5A5A5A5A; + x59A31CE6 = x22DD6699 ^ x7B7E7A7F; + + x09030C06 = a3 & x59A31CE6; + x09030000 = x09030C06 & ~a6; + x336622FF = x00220099 | x33662277; + x3A6522FF = x09030000 ^ x336622FF; + x30 = x3A6522FF & a4; + x31 = x30 ^ x59A31CE6; + *out4 ^= x31; + + x484D494C = a2 ^ x7B7E7A7F; + x0000B6B3 = a6 & ~x484D494C; + x0F0FB9BC = a3 ^ x0000B6B3; + x00FC00F9 = a5 & ~x09030C06; + x0FFFB9FD = x0F0FB9BC | x00FC00F9; + + x5DF75DF7 = a1 | x59A31CE6; + x116600F7 = x336622FF & x5DF75DF7; + x1E69B94B = x0F0FB9BC ^ x116600F7; + x1668B94B = x1E69B94B & ~x09030000; + x20 = x00220099 | a4; + x21 = x20 ^ x1668B94B; + *out3 ^= x21; + + x7B7B7B7B = a2 | x5A5A5A5A; + x411E5984 = x3A6522FF ^ x7B7B7B7B; + x1FFFFDFD = x11115555 | x0FFFB9FD; + x5EE1A479 = x411E5984 ^ x1FFFFDFD; + + x3CB4DFD2 = x22DD6699 ^ x1E69B94B; + x004B002D = a5 & ~x3CB4DFD2; + xB7B2B6B3 = ~x484D494C; + xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; + xCC82CDE5 = x004B002D ^ xCCC9CDC8; + x10 = xCC82CDE5 & ~a4; + x11 = x10 ^ x5EE1A479; + *out2 ^= x11; + + x0055EEBB = a6 ^ x00551144; + x5A5AECE9 = a1 ^ x0F0FB9BC; + x0050ECA9 = x0055EEBB & x5A5AECE9; + xC5CAC1CE = x09030C06 ^ xCCC9CDC8; + xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; + x00 = x0FFFB9FD & ~a4; + x01 = x00 ^ xC59A2D67; + *out1 ^= x01; +} + +static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; + u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; + u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; + u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; + u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; + u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; + u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; + u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x0FF00FF0 = a4 ^ a5; + x3CC33CC3 = a3 ^ x0FF00FF0; + x00003CC3 = a6 & x3CC33CC3; + x0F000F00 = a4 & x0FF00FF0; + x5A555A55 = a2 ^ x0F000F00; + x00001841 = x00003CC3 & x5A555A55; + + x00000F00 = a6 & x0F000F00; + x33333C33 = a3 ^ x00000F00; + x7B777E77 = x5A555A55 | x33333C33; + x0FF0F00F = a6 ^ x0FF00FF0; + x74878E78 = x7B777E77 ^ x0FF0F00F; + x30 = a1 & ~x00001841; + x31 = x30 ^ x74878E78; + *out4 ^= x31; + + x003C003C = a5 & ~x3CC33CC3; + x5A7D5A7D = x5A555A55 | x003C003C; + x333300F0 = x00003CC3 ^ x33333C33; + x694E5A8D = x5A7D5A7D ^ x333300F0; + + x0FF0CCCC = x00003CC3 ^ x0FF0F00F; + x000F0303 = a4 & ~x0FF0CCCC; + x5A505854 = x5A555A55 & ~x000F0303; + x33CC000F = a5 ^ x333300F0; + x699C585B = x5A505854 ^ x33CC000F; + + x7F878F78 = x0F000F00 | x74878E78; + x21101013 = a3 & x699C585B; + x7F979F7B = x7F878F78 | x21101013; + x30030CC0 = x3CC33CC3 & ~x0FF0F00F; + x4F9493BB = x7F979F7B ^ x30030CC0; + x00 = x4F9493BB & ~a1; + x01 = x00 ^ x694E5A8D; + *out1 ^= x01; + + x6F9CDBFB = x699C585B | x4F9493BB; + x0000DBFB = a6 & x6F9CDBFB; + x00005151 = a2 & x0000DBFB; + x26DAC936 = x694E5A8D ^ x4F9493BB; + x26DA9867 = x00005151 ^ x26DAC936; + + x27DA9877 = x21101013 | x26DA9867; + x27DA438C = x0000DBFB ^ x27DA9877; + x2625C9C9 = a5 ^ x26DAC936; + x27FFCBCD = x27DA438C | x2625C9C9; + x20 = x27FFCBCD & a1; + x21 = x20 ^ x699C585B; + *out3 ^= x21; + + x27FF1036 = x0000DBFB ^ x27FFCBCD; + x27FF103E = x003C003C | x27FF1036; + xB06B6C44 = ~x4F9493BB; + x97947C7A = x27FF103E ^ xB06B6C44; + x10 = x97947C7A & ~a1; + x11 = x10 ^ x26DA9867; + *out2 ^= x11; +} + +static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) +{ + u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; + u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; + u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; + u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; + u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; + u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; + u32 xF700A600, x61008000, x03B7856B, x62B7056B; + u32 x00, x01, x10, x11, x20, x21, x30, x31; + + x0C0C0C0C = a3 & ~a2; + x0000F0F0 = a5 & ~a3; + x00FFF00F = a4 ^ x0000F0F0; + x00555005 = a1 & x00FFF00F; + x00515001 = x00555005 & ~x0C0C0C0C; + + x33000330 = a2 & ~x00FFF00F; + x77555775 = a1 | x33000330; + x30303030 = a2 & ~a3; + x3030CFCF = a5 ^ x30303030; + x30104745 = x77555775 & x3030CFCF; + x30555745 = x00555005 | x30104745; + + xFF000FF0 = ~x00FFF00F; + xCF1048B5 = x30104745 ^ xFF000FF0; + x080A080A = a3 & ~x77555775; + xC71A40BF = xCF1048B5 ^ x080A080A; + xCB164CB3 = x0C0C0C0C ^ xC71A40BF; + x10 = x00515001 | a6; + x11 = x10 ^ xCB164CB3; + *out2 ^= x11; + + x9E4319E6 = a1 ^ xCB164CB3; + x000019E6 = a5 & x9E4319E6; + xF429738C = a2 ^ xC71A40BF; + xF4296A6A = x000019E6 ^ xF429738C; + xC729695A = x33000330 ^ xF4296A6A; + + xC47C3D2F = x30555745 ^ xF4296A6A; + xF77F3F3F = a2 | xC47C3D2F; + x9E43E619 = a5 ^ x9E4319E6; + x693CD926 = xF77F3F3F ^ x9E43E619; + x20 = x30555745 & a6; + x21 = x20 ^ x693CD926; + *out3 ^= x21; + + xF719A695 = x3030CFCF ^ xC729695A; + xF4FF73FF = a4 | xF429738C; + x03E6D56A = xF719A695 ^ xF4FF73FF; + x56B3803F = a1 ^ x03E6D56A; + x30 = x56B3803F & a6; + x31 = x30 ^ xC729695A; + *out4 ^= x31; + + xF700A600 = xF719A695 & ~a4; + x61008000 = x693CD926 & xF700A600; + x03B7856B = x00515001 ^ x03E6D56A; + x62B7056B = x61008000 ^ x03B7856B; + x00 = x62B7056B | a6; + x01 = x00 ^ xC729695A; + *out1 ^= x01; +} + +#endif +#endif + +#ifdef IS_AMD /* * Bitslice DES S-boxes making use of a vector conditional select operation @@ -530,8 +1407,9 @@ s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, vsel(x0, xA59E6C31, x38D696A5, a6); vxor(*out1, *out1, x0); } +#endif -#define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; } +#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; } #define DATASWAP \ SWAP(D00, D32); \ @@ -584,7 +1462,7 @@ s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; } #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; } -static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63) +static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63) { KXX_DECL u32 k00, k01, k02, k03, k04, k05; KXX_DECL u32 k06, k07, k08, k09, k10, k11; @@ -595,39 +1473,106 @@ static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, con KXX_DECL u32 k36, k37, k38, k39, k40, k41; KXX_DECL u32 k42, k43, k44, k45, k46, k47; + #ifdef IS_NV + #if CUDA_ARCH >= 500 + #else #pragma unroll - for (u32 i = 0; i < 16; i++) - { - switch (i) - { - case 0: KEYSET00; break; - case 1: KEYSET01; break; - case 2: KEYSET02; break; - case 3: KEYSET03; break; - case 4: KEYSET04; break; - case 5: KEYSET05; break; - case 6: KEYSET06; break; - case 7: KEYSET07; break; - case 8: KEYSET10; break; - case 9: KEYSET11; break; - case 10: KEYSET12; break; - case 11: KEYSET13; break; - case 12: KEYSET14; break; - case 13: KEYSET15; break; - case 14: KEYSET16; break; - case 15: KEYSET17; break; - } + #endif + #endif - s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30); - s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17); - s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02); - s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18); - s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); + #ifdef IS_AMD + #pragma unroll + #endif - DATASWAP; + for (u32 i = 0; i < 2; i++) + { + if (i) KEYSET10 else KEYSET00 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET11 else KEYSET01 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + + if (i) KEYSET12 else KEYSET02 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET13 else KEYSET03 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + + if (i) KEYSET14 else KEYSET04 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET15 else KEYSET05 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); + + if (i) KEYSET16 else KEYSET06 + + s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); + s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); + s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); + s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); + s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); + s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); + s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); + s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); + + if (i) KEYSET17 else KEYSET07 + + s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); + s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); + s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); + s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); + s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); + s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); + s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); + s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); } } @@ -792,8 +1737,6 @@ static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * const u32 K54 = pws[gid].i[54]; const u32 K55 = pws[gid].i[55]; - const u32 bfs_cnt = bfs_cnt; - const u32 pc_pos = get_local_id (1); const u32 il_pos = pc_pos * 32; @@ -937,18 +1880,18 @@ static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * k07, k08, k09, k10, k11, k12, k13, k14, k15, k16, k17, k18, k19, k20, k21, k22, k23, k24, k25, k26, k27, - k28, k29, k30, k31, K32, K33, K34, + K28, K29, K30, K31, K32, K33, K34, K35, K36, K37, K38, K39, K40, K41, K42, K43, K44, K45, K46, K47, K48, K49, K50, K51, K52, K53, K54, K55, - D00, D01, D02, D03, D04, D05, D06, D07, - D08, D09, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - D32, D33, D34, D35, D36, D37, D38, D39, - D40, D41, D42, D43, D44, D45, D46, D47, - D48, D49, D50, D51, D52, D53, D54, D55, - D56, D57, D58, D59, D60, D61, D62, D63 + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 ); u32 out[64]; @@ -1365,18 +2308,18 @@ static void m03000s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * k07, k08, k09, k10, k11, k12, k13, k14, k15, k16, k17, k18, k19, k20, k21, k22, k23, k24, k25, k26, k27, - k28, k29, k30, k31, K32, K33, K34, + K28, K29, K30, K31, K32, K33, K34, K35, K36, K37, K38, K39, K40, K41, K42, K43, K44, K45, K46, K47, K48, K49, K50, K51, K52, K53, K54, K55, - D00, D01, D02, D03, D04, D05, D06, D07, - D08, D09, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - D32, D33, D34, D35, D36, D37, D38, D39, - D40, D41, D42, D43, D44, D45, D46, D47, - D48, D49, D50, D51, D52, D53, D54, D55, - D56, D57, D58, D59, D60, D61, D62, D63 + &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, + &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, + &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, + &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, + &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, + &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, + &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, + &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 ); u32 tmpResult = 0;