mirror of
https://github.com/hashcat/hashcat.git
synced 2024-12-22 14:48:12 +00:00
Started optimizing some of the OpenCL kernel for latest AMD Catalyst 15.12:
- Replaced SBOX for DES: replaced JtR's * Bitslice DES S-boxes making use of a vector conditional select operation (e.g., vsel on PowerPC with AltiVec). with JtR's * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC architectures. Performance increased for DEScrypt from 355MH/s to 405MH/s and for LM from 11100MH/s to 12000MH/s BTW, the same effect can be seen with non-maxwell GPU's - Remove some volatile keywords no longer needed thanks to fixed catalyst bugs - Fix weak-hash-check parameter for use with tools/test.sh
This commit is contained in:
parent
24b5aa6226
commit
245301c9b4
@ -896,11 +896,11 @@ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u3
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
|
||||
/*
|
||||
* Bitslice DES S-boxes making use of a vector conditional select operation
|
||||
* (e.g., vsel on PowerPC with AltiVec).
|
||||
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
|
||||
* architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
|
||||
*
|
||||
* Gate counts: 36 33 33 26 35 34 34 32
|
||||
* Average: 32.875
|
||||
* Gate counts: 49 44 46 33 48 46 46 41
|
||||
* Average: 44.125
|
||||
*
|
||||
* Several same-gate-count expressions for each S-box are included (for use on
|
||||
* different CPUs/GPUs).
|
||||
@ -919,489 +919,558 @@ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u3
|
||||
* The effort has been sponsored by Rapid7: http://www.rapid7.com
|
||||
*/
|
||||
|
||||
#define vnot(dst, a) (dst) = ~(a)
|
||||
#define vand(dst, a, b) (dst) = (a) & (b)
|
||||
#define vor(dst, a, b) (dst) = (a) | (b)
|
||||
#define vandn(dst, a, b) (dst) = (a) & ~(b)
|
||||
#define vxor(dst, a, b) (dst) = (a) ^ (b)
|
||||
#define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
|
||||
|
||||
static void
|
||||
s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
|
||||
x5CA9E295;
|
||||
u32 x55AFD1B7, x3C3C69C3, x6993B874;
|
||||
u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
|
||||
u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
|
||||
u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
|
||||
u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
|
||||
u32 x0DBCE883, x3A25A215, x37994A96;
|
||||
u32 x8A487EA7, x8B480F07, xB96C2D16;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
|
||||
x25202160;
|
||||
u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
|
||||
u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
|
||||
u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
|
||||
u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
|
||||
u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
|
||||
u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
|
||||
u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x0F0F3333, a3, a2, a5);
|
||||
vxor(x3C3C3C3C, a2, a3);
|
||||
vor(x55FF55FF, a1, a4);
|
||||
vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
|
||||
vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
|
||||
vxor(x09FCB7C0, a4, x0903B73F);
|
||||
vxor(x5CA9E295, a1, x09FCB7C0);
|
||||
x55005500 = a1 & ~a5;
|
||||
x5A0F5A0F = a4 ^ x55005500;
|
||||
x3333FFFF = a3 | a6;
|
||||
x66666666 = a1 ^ a3;
|
||||
x22226666 = x3333FFFF & x66666666;
|
||||
x2D2D6969 = a4 ^ x22226666;
|
||||
x25202160 = x2D2D6969 & ~x5A0F5A0F;
|
||||
|
||||
vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
|
||||
vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
|
||||
vxor(x6993B874, x55AFD1B7, x3C3C69C3);
|
||||
x00FFFF00 = a5 ^ a6;
|
||||
x33CCCC33 = a3 ^ x00FFFF00;
|
||||
x4803120C = x5A0F5A0F & ~x33CCCC33;
|
||||
x2222FFFF = a6 | x22226666;
|
||||
x6A21EDF3 = x4803120C ^ x2222FFFF;
|
||||
x4A01CC93 = x6A21EDF3 & ~x25202160;
|
||||
|
||||
vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
|
||||
vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
|
||||
vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
|
||||
vxor(x529E962D, x0F0F3333, x5D91A51E);
|
||||
x5555FFFF = a1 | a6;
|
||||
x7F75FFFF = x6A21EDF3 | x5555FFFF;
|
||||
x00D20096 = a5 & ~x2D2D6969;
|
||||
x7FA7FF69 = x7F75FFFF ^ x00D20096;
|
||||
|
||||
vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
|
||||
vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
|
||||
vsel(x428679F3, a5, x4B8771A3, x529E962D);
|
||||
vxor(x6B68D433, x29EEADC0, x428679F3);
|
||||
x0A0A0000 = a4 & ~x5555FFFF;
|
||||
x0AD80096 = x00D20096 ^ x0A0A0000;
|
||||
x00999900 = x00FFFF00 & ~x66666666;
|
||||
x0AD99996 = x0AD80096 | x00999900;
|
||||
|
||||
vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
|
||||
vsel(x026F12F3, a4, x0F0F3333, x529E962D);
|
||||
vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
|
||||
vnot(x94D83B6C, x6B27C493);
|
||||
vsel(x0, x94D83B6C, x6B68D433, a6);
|
||||
vxor(*out1, *out1, x0);
|
||||
x22332233 = a3 & ~x55005500;
|
||||
x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
|
||||
x054885C0 = x257AA5F0 & ~x22332233;
|
||||
xFAB77A3F = ~x054885C0;
|
||||
x2221EDF3 = x3333FFFF & x6A21EDF3;
|
||||
xD89697CC = xFAB77A3F ^ x2221EDF3;
|
||||
x20 = x7FA7FF69 & ~a2;
|
||||
x21 = x20 ^ xD89697CC;
|
||||
*out3 ^= x21;
|
||||
|
||||
vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
|
||||
vsel(x3327A113, x5BA7E193, a2, x69C369C3);
|
||||
vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
|
||||
vxor(xD6E19C32, x529E962D, x847F0A1F);
|
||||
vsel(x1, xD6E19C32, x5CA9E295, a6);
|
||||
vxor(*out2, *out2, x1);
|
||||
x05B77AC0 = x00FFFF00 ^ x054885C0;
|
||||
x05F77AD6 = x00D20096 | x05B77AC0;
|
||||
x36C48529 = x3333FFFF ^ x05F77AD6;
|
||||
x6391D07C = a1 ^ x36C48529;
|
||||
xBB0747B0 = xD89697CC ^ x6391D07C;
|
||||
x00 = x25202160 | a2;
|
||||
x01 = x00 ^ xBB0747B0;
|
||||
*out1 ^= x01;
|
||||
|
||||
vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
|
||||
vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
|
||||
vxor(x37994A96, x0DBCE883, x3A25A215);
|
||||
vsel(x3, x37994A96, x529E962D, a6);
|
||||
vxor(*out4, *out4, x3);
|
||||
x4C460000 = x3333FFFF ^ x7F75FFFF;
|
||||
x4EDF9996 = x0AD99996 | x4C460000;
|
||||
x2D4E49EA = x6391D07C ^ x4EDF9996;
|
||||
xBBFFFFB0 = x00FFFF00 | xBB0747B0;
|
||||
x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
|
||||
x10 = x4A01CC93 | a2;
|
||||
x11 = x10 ^ x96B1B65A;
|
||||
*out2 ^= x11;
|
||||
|
||||
vxor(x8A487EA7, x5CA9E295, xD6E19C32);
|
||||
vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
|
||||
vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
|
||||
vsel(x2, xB96C2D16, x6993B874, a6);
|
||||
vxor(*out3, *out3, x2);
|
||||
x5AFF5AFF = a5 | x5A0F5A0F;
|
||||
x52B11215 = x5AFF5AFF & ~x2D4E49EA;
|
||||
x4201C010 = x4A01CC93 & x6391D07C;
|
||||
x10B0D205 = x52B11215 ^ x4201C010;
|
||||
x30 = x10B0D205 | a2;
|
||||
x31 = x30 ^ x0AD99996;
|
||||
*out4 ^= x31;
|
||||
}
|
||||
|
||||
static void
|
||||
s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
|
||||
u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
|
||||
u32 x0F5AF03C, x6600FF56, x87A5F09C;
|
||||
u32 xA55A963C, x3C69C30F, xB44BC32D;
|
||||
u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
|
||||
u32 xB46C662D, x278DB412, xB66CB43B;
|
||||
u32 xD2DC4E52, x27993333, xD2994E33;
|
||||
u32 x278D0F2D, x2E0E547B, x09976748;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x33CC33CC;
|
||||
u32 x55550000, x00AA00FF, x33BB33FF;
|
||||
u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
|
||||
u32 x00000F0F, x336600FF, x332200FF, x332200F0;
|
||||
u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
|
||||
u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
|
||||
u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
|
||||
u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
|
||||
u32 x0A451047, xBBDFDD7B, xB19ACD3C;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x55553333, a1, a3, a6);
|
||||
vsel(x0055FF33, a6, x55553333, a5);
|
||||
vsel(x33270F03, a3, a4, x0055FF33);
|
||||
vxor(x66725A56, a1, x33270F03);
|
||||
vxor(x00FFFF00, a5, a6);
|
||||
vxor(x668DA556, x66725A56, x00FFFF00);
|
||||
x33CC33CC = a2 ^ a5;
|
||||
|
||||
vsel(x0F0F5A56, a4, x66725A56, a6);
|
||||
vnot(xF0F0A5A9, x0F0F5A56);
|
||||
vxor(xA5A5969A, x55553333, xF0F0A5A9);
|
||||
vxor(xA55A699A, x00FFFF00, xA5A5969A);
|
||||
vsel(x1, xA55A699A, x668DA556, a2);
|
||||
vxor(*out2, *out2, x1);
|
||||
x55550000 = a1 & ~a6;
|
||||
x00AA00FF = a5 & ~x55550000;
|
||||
x33BB33FF = a2 | x00AA00FF;
|
||||
|
||||
vxor(x0F5AF03C, a4, x0055FF33);
|
||||
vsel(x6600FF56, x66725A56, a6, x00FFFF00);
|
||||
vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
|
||||
x33CC0000 = x33CC33CC & ~a6;
|
||||
x11441144 = a1 & x33CC33CC;
|
||||
x11BB11BB = a5 ^ x11441144;
|
||||
x003311BB = x11BB11BB & ~x33CC0000;
|
||||
|
||||
vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
|
||||
vxor(x3C69C30F, a3, x0F5AF03C);
|
||||
vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
|
||||
x00000F0F = a3 & a6;
|
||||
x336600FF = x00AA00FF ^ x33CC0000;
|
||||
x332200FF = x33BB33FF & x336600FF;
|
||||
x332200F0 = x332200FF & ~x00000F0F;
|
||||
|
||||
vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
|
||||
vsel(x0F4B0F2D, a4, xB44BC32D, a5);
|
||||
vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
|
||||
vxor(x996C66D2, xF0F0A5A9, x699CC37B);
|
||||
vsel(x0, x996C66D2, xB44BC32D, a2);
|
||||
vxor(*out1, *out1, x0);
|
||||
x0302000F = a3 & x332200FF;
|
||||
xAAAAAAAA = ~a1;
|
||||
xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
|
||||
x33CCCC33 = a6 ^ x33CC33CC;
|
||||
x33CCC030 = x33CCCC33 & ~x00000F0F;
|
||||
x9A646A95 = xA9A8AAA5 ^ x33CCC030;
|
||||
x10 = a4 & ~x332200F0;
|
||||
x11 = x10 ^ x9A646A95;
|
||||
*out2 ^= x11;
|
||||
|
||||
vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
|
||||
vsel(x278DB412, x668DA556, xA5A5969A, a1);
|
||||
vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
|
||||
x00333303 = a2 & ~x33CCC030;
|
||||
x118822B8 = x11BB11BB ^ x00333303;
|
||||
xA8208805 = xA9A8AAA5 & ~x118822B8;
|
||||
x3CC3C33C = a3 ^ x33CCCC33;
|
||||
x94E34B39 = xA8208805 ^ x3CC3C33C;
|
||||
x00 = x33BB33FF & ~a4;
|
||||
x01 = x00 ^ x94E34B39;
|
||||
*out1 ^= x01;
|
||||
|
||||
vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
|
||||
vsel(x27993333, x278DB412, a3, x0055FF33);
|
||||
vsel(xD2994E33, xD2DC4E52, x27993333, a5);
|
||||
vsel(x3, x87A5F09C, xD2994E33, a2);
|
||||
vxor(*out4, *out4, x3);
|
||||
x0331330C = x0302000F ^ x00333303;
|
||||
x3FF3F33C = x3CC3C33C | x0331330C;
|
||||
xA9DF596A = x33BB33FF ^ x9A646A95;
|
||||
xA9DF5F6F = x00000F0F | xA9DF596A;
|
||||
x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
|
||||
|
||||
vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
|
||||
vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
|
||||
vxor(x09976748, x27993333, x2E0E547B);
|
||||
vsel(x2, xB66CB43B, x09976748, a2);
|
||||
vxor(*out3, *out3, x2);
|
||||
xA9466A6A = x332200FF ^ x9A646A95;
|
||||
x3DA52153 = x94E34B39 ^ xA9466A6A;
|
||||
x29850143 = xA9DF5F6F & x3DA52153;
|
||||
x33C0330C = x33CC33CC & x3FF3F33C;
|
||||
x1A45324F = x29850143 ^ x33C0330C;
|
||||
x20 = x1A45324F | a4;
|
||||
x21 = x20 ^ x962CAC53;
|
||||
*out3 ^= x21;
|
||||
|
||||
x0A451047 = x1A45324F & ~x118822B8;
|
||||
xBBDFDD7B = x33CCCC33 | xA9DF596A;
|
||||
xB19ACD3C = x0A451047 ^ xBBDFDD7B;
|
||||
x30 = x003311BB | a4;
|
||||
x31 = x30 ^ xB19ACD3C;
|
||||
*out4 ^= x31;
|
||||
}
|
||||
|
||||
static void
|
||||
s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x0F330F33, x0F33F0CC, x5A66A599;
|
||||
u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
|
||||
u32 x556BA09E, x665A93AC, x99A56C53;
|
||||
u32 x25A1A797, x5713754C, x66559355, x47B135C6;
|
||||
u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
|
||||
u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
|
||||
u32 xD6599874, x05330022, xD2699876;
|
||||
u32 x665F9364, xD573F0F2, xB32C6396;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
|
||||
u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
|
||||
u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
|
||||
u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
|
||||
u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
|
||||
u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
|
||||
u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
|
||||
u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x0F330F33, a4, a3, a5);
|
||||
vxor(x0F33F0CC, a6, x0F330F33);
|
||||
vxor(x5A66A599, a2, x0F33F0CC);
|
||||
x44444444 = a1 & ~a2;
|
||||
x0F0FF0F0 = a3 ^ a6;
|
||||
x4F4FF4F4 = x44444444 | x0F0FF0F0;
|
||||
x00FFFF00 = a4 ^ a6;
|
||||
x00AAAA00 = x00FFFF00 & ~a1;
|
||||
x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
|
||||
|
||||
vsel(x2111B7BB, a3, a6, x5A66A599);
|
||||
vsel(x03FF3033, a5, a3, x0F33F0CC);
|
||||
vsel(x05BB50EE, a5, x0F33F0CC, a2);
|
||||
vsel(x074F201F, x03FF3033, a4, x05BB50EE);
|
||||
vxor(x265E97A4, x2111B7BB, x074F201F);
|
||||
x3C3CC3C3 = a2 ^ x0F0FF0F0;
|
||||
x3C3C0000 = x3C3CC3C3 & ~a6;
|
||||
x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
|
||||
x0C840A00 = x4FE55EF4 & ~x7373F4F4;
|
||||
|
||||
vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
|
||||
vsel(x665A93AC, x556BA09E, x265E97A4, a3);
|
||||
vnot(x99A56C53, x665A93AC);
|
||||
vsel(x1, x265E97A4, x99A56C53, a1);
|
||||
vxor(*out2, *out2, x1);
|
||||
x00005EF4 = a6 & x4FE55EF4;
|
||||
x00FF5EFF = a4 | x00005EF4;
|
||||
x00555455 = a1 & x00FF5EFF;
|
||||
x3C699796 = x3C3CC3C3 ^ x00555455;
|
||||
x30 = x4FE55EF4 & ~a5;
|
||||
x31 = x30 ^ x3C699796;
|
||||
*out4 ^= x31;
|
||||
|
||||
vxor(x25A1A797, x03FF3033, x265E97A4);
|
||||
vsel(x5713754C, a2, x0F33F0CC, x074F201F);
|
||||
vsel(x66559355, x665A93AC, a2, a5);
|
||||
vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
|
||||
x000FF000 = x0F0FF0F0 & x00FFFF00;
|
||||
x55AA55AA = a1 ^ a4;
|
||||
x26D9A15E = x7373F4F4 ^ x55AA55AA;
|
||||
x2FDFAF5F = a3 | x26D9A15E;
|
||||
x2FD00F5F = x2FDFAF5F & ~x000FF000;
|
||||
|
||||
vxor(x9A5A5C60, x03FF3033, x99A56C53);
|
||||
vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
|
||||
vxor(x87698DB4, x5713754C, xD07AF8F8);
|
||||
vxor(xE13C1EE1, x66559355, x87698DB4);
|
||||
x55AAFFAA = x00AAAA00 | x55AA55AA;
|
||||
x28410014 = x3C699796 & ~x55AAFFAA;
|
||||
x000000FF = a4 & a6;
|
||||
x000000CC = x000000FF & ~a2;
|
||||
x284100D8 = x28410014 ^ x000000CC;
|
||||
|
||||
vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
|
||||
vsel(x655B905E, x66559355, x05BB50EE, a4);
|
||||
vsel(x00A55CFF, a5, a6, x9A5A5C60);
|
||||
vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
|
||||
vsel(x0, x9E49915E, xE13C1EE1, a1);
|
||||
vxor(*out1, *out1, x0);
|
||||
x204100D0 = x7373F4F4 & x284100D8;
|
||||
x3C3CC3FF = x3C3CC3C3 | x000000FF;
|
||||
x1C3CC32F = x3C3CC3FF & ~x204100D0;
|
||||
x4969967A = a1 ^ x1C3CC32F;
|
||||
x10 = x2FD00F5F & a5;
|
||||
x11 = x10 ^ x4969967A;
|
||||
*out2 ^= x11;
|
||||
|
||||
vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
|
||||
vand(x05330022, x0F330F33, x05BB50EE);
|
||||
vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
|
||||
vsel(x3, x5A66A599, xD2699876, a1);
|
||||
vxor(*out4, *out4, x3);
|
||||
x4CC44CC4 = x4FE55EF4 & ~a2;
|
||||
x40C040C0 = x4CC44CC4 & ~a3;
|
||||
xC3C33C3C = ~x3C3CC3C3;
|
||||
x9669C396 = x55AAFFAA ^ xC3C33C3C;
|
||||
xD6A98356 = x40C040C0 ^ x9669C396;
|
||||
x00 = a5 & ~x0C840A00;
|
||||
x01 = x00 ^ xD6A98356;
|
||||
*out1 ^= x01;
|
||||
|
||||
vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
|
||||
vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
|
||||
vxor(xB32C6396, x665F9364, xD573F0F2);
|
||||
vsel(x2, xB32C6396, x47B135C6, a1);
|
||||
vxor(*out3, *out3, x2);
|
||||
xD6E9C3D6 = x40C040C0 | x9669C396;
|
||||
x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
|
||||
x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
|
||||
x001A000B = a4 & ~x4FE55EF4;
|
||||
x9A1F2D1B = x9A072D12 | x001A000B;
|
||||
x20 = a5 & ~x284100D8;
|
||||
x21 = x20 ^ x9A1F2D1B;
|
||||
*out3 ^= x21;
|
||||
}
|
||||
|
||||
static void
|
||||
s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
|
||||
x0AF50F0F, x4CA36B59;
|
||||
u32 xB35C94A6;
|
||||
u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
|
||||
u32 x56E9861E;
|
||||
u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
|
||||
u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x5A5A5A5A, x0F0FF0F0;
|
||||
u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
|
||||
x52FBCA0F, x61C8F93C;
|
||||
u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
|
||||
u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
|
||||
u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x0505AFAF, a5, a3, a1);
|
||||
vsel(x0555AF55, x0505AFAF, a1, a4);
|
||||
vxor(x0A5AA05A, a3, x0555AF55);
|
||||
vsel(x46566456, a1, x0A5AA05A, a2);
|
||||
vsel(x0A0A5F5F, a3, a5, a1);
|
||||
vxor(x0AF55FA0, a4, x0A0A5F5F);
|
||||
vsel(x0AF50F0F, x0AF55FA0, a3, a5);
|
||||
vxor(x4CA36B59, x46566456, x0AF50F0F);
|
||||
x5A5A5A5A = a1 ^ a3;
|
||||
x0F0FF0F0 = a3 ^ a5;
|
||||
x33FF33FF = a2 | a4;
|
||||
x33FFCC00 = a5 ^ x33FF33FF;
|
||||
x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
|
||||
x0C0CC0C0 = x0F0FF0F0 & ~a2;
|
||||
x0CF3C03F = a4 ^ x0C0CC0C0;
|
||||
x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
|
||||
x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
|
||||
x61C8F93C = a2 ^ x52FBCA0F;
|
||||
|
||||
vnot(xB35C94A6, x4CA36B59);
|
||||
x00C0C03C = x0CF3C03F & x61C8F93C;
|
||||
x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
|
||||
x3B92A366 = x5A5A5A5A ^ x61C8F93C;
|
||||
x30908326 = x3B92A366 & ~x0F0F30C0;
|
||||
x3C90B3D6 = x0C0030F0 ^ x30908326;
|
||||
|
||||
vsel(x01BB23BB, a4, a2, x0555AF55);
|
||||
vxor(x5050FAFA, a1, x0505AFAF);
|
||||
vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
|
||||
vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
|
||||
x33CC33CC = a2 ^ a4;
|
||||
x0C0CFFFF = a5 | x0C0CC0C0;
|
||||
x379E5C99 = x3B92A366 ^ x0C0CFFFF;
|
||||
x04124C11 = x379E5C99 & ~x33CC33CC;
|
||||
x56E9861E = x52FBCA0F ^ x04124C11;
|
||||
x00 = a6 & ~x3C90B3D6;
|
||||
x01 = x00 ^ x56E9861E;
|
||||
*out1 ^= x01;
|
||||
|
||||
vnot(x56E9861E, xA91679E1);
|
||||
xA91679E1 = ~x56E9861E;
|
||||
x10 = x3C90B3D6 & ~a6;
|
||||
x11 = x10 ^ xA91679E1;
|
||||
*out2 ^= x11;
|
||||
|
||||
vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
|
||||
vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
|
||||
vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
|
||||
vxor(xD2946D9A, x50E9FA1E, x827D9784);
|
||||
vsel(x2, xD2946D9A, x4CA36B59, a6);
|
||||
vxor(*out3, *out3, x2);
|
||||
vsel(x3, xB35C94A6, xD2946D9A, a6);
|
||||
vxor(*out4, *out4, x3);
|
||||
x9586CA37 = x3C90B3D6 ^ xA91679E1;
|
||||
x8402C833 = x9586CA37 & ~x33CC33CC;
|
||||
x84C2C83F = x00C0C03C | x8402C833;
|
||||
xB35C94A6 = x379E5C99 ^ x84C2C83F;
|
||||
x20 = x61C8F93C | a6;
|
||||
x21 = x20 ^ xB35C94A6;
|
||||
*out3 ^= x21;
|
||||
|
||||
vsel(x31F720B3, a2, a4, x0AF55FA0);
|
||||
vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
|
||||
vxor(x4712A7AD, x56E9861E, x11FB21B3);
|
||||
vxor(x9586CA37, xD2946D9A, x4712A7AD);
|
||||
vsel(x0, x56E9861E, x9586CA37, a6);
|
||||
vxor(*out1, *out1, x0);
|
||||
vsel(x1, x9586CA37, xA91679E1, a6);
|
||||
vxor(*out2, *out2, x1);
|
||||
x30 = a6 & x61C8F93C;
|
||||
x31 = x30 ^ xB35C94A6;
|
||||
*out4 ^= x31;
|
||||
}
|
||||
|
||||
static void
|
||||
s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
|
||||
u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
|
||||
u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
|
||||
u32 xAC81CFB2, xF72577AF, x5BA4B81D;
|
||||
u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
|
||||
u32 x12E6283D, x9E47D3D4, x1A676AB4;
|
||||
u32 x891556DF, xE5E77F82, x6CF2295D;
|
||||
u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
|
||||
u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
|
||||
u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
|
||||
u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
|
||||
u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
|
||||
u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
|
||||
u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
|
||||
u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x550F550F, a1, a3, a5);
|
||||
vnot(xAAF0AAF0, x550F550F);
|
||||
vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
|
||||
vxor(x96C696C6, a2, xA5F5A5F5);
|
||||
vxor(x00FFFF00, a5, a6);
|
||||
vxor(x963969C6, x96C696C6, x00FFFF00);
|
||||
x77777777 = a1 | a3;
|
||||
x77770000 = x77777777 & ~a6;
|
||||
x22225555 = a1 ^ x77770000;
|
||||
x11116666 = a3 ^ x22225555;
|
||||
x1F1F6F6F = a4 | x11116666;
|
||||
|
||||
vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
|
||||
vsel(xB73121F7, a2, x963969C6, x96C696C6);
|
||||
vsel(x1501DF0F, a6, x550F550F, xB73121F7);
|
||||
vsel(x00558A5F, x1501DF0F, a5, a1);
|
||||
vxor(x2E69A463, x2E3C2E3C, x00558A5F);
|
||||
x70700000 = x77770000 & ~a4;
|
||||
x43433333 = a3 ^ x70700000;
|
||||
x00430033 = a5 & x43433333;
|
||||
x55557777 = a1 | x11116666;
|
||||
x55167744 = x00430033 ^ x55557777;
|
||||
x5A19784B = a4 ^ x55167744;
|
||||
|
||||
vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
|
||||
vsel(x045157FD, a6, a1, x0679ED42);
|
||||
vsel(xB32077FF, xB73121F7, a6, x045157FD);
|
||||
vxor(x9D49D39C, x2E69A463, xB32077FF);
|
||||
vsel(x2, x9D49D39C, x2E69A463, a4);
|
||||
vxor(*out3, *out3, x2);
|
||||
x5A1987B4 = a6 ^ x5A19784B;
|
||||
x7A3BD7F5 = x22225555 | x5A1987B4;
|
||||
x003B00F5 = a5 & x7A3BD7F5;
|
||||
x221955A0 = x22225555 ^ x003B00F5;
|
||||
x05050707 = a4 & x55557777;
|
||||
x271C52A7 = x221955A0 ^ x05050707;
|
||||
|
||||
vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
|
||||
vsel(xF72577AF, xB32077FF, x550F550F, a1);
|
||||
vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
|
||||
vsel(x1, x5BA4B81D, x963969C6, a4);
|
||||
vxor(*out2, *out2, x1);
|
||||
x2A2A82A0 = x7A3BD7F5 & ~a1;
|
||||
x6969B193 = x43433333 ^ x2A2A82A0;
|
||||
x1FE06F90 = a5 ^ x1F1F6F6F;
|
||||
x16804E00 = x1FE06F90 & ~x6969B193;
|
||||
xE97FB1FF = ~x16804E00;
|
||||
x20 = xE97FB1FF & ~a2;
|
||||
x21 = x20 ^ x5A19784B;
|
||||
*out3 ^= x21;
|
||||
|
||||
vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
|
||||
vsel(x4895469F, x5BA477AF, x00558A5F, a2);
|
||||
vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
|
||||
vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
|
||||
x43403302 = x43433333 & ~x003B00F5;
|
||||
x35CAED30 = x2A2A82A0 ^ x1FE06F90;
|
||||
x37DEFFB7 = x271C52A7 | x35CAED30;
|
||||
x349ECCB5 = x37DEFFB7 & ~x43403302;
|
||||
x0B01234A = x1F1F6F6F & ~x349ECCB5;
|
||||
|
||||
vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
|
||||
vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
|
||||
vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
|
||||
x101884B4 = x5A1987B4 & x349ECCB5;
|
||||
x0FF8EB24 = x1FE06F90 ^ x101884B4;
|
||||
x41413333 = x43433333 & x55557777;
|
||||
x4FF9FB37 = x0FF8EB24 | x41413333;
|
||||
x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
|
||||
x30 = x4FC2FBC2 & a2;
|
||||
x31 = x30 ^ x271C52A7;
|
||||
*out4 ^= x31;
|
||||
|
||||
vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
|
||||
vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
|
||||
vxor(x6CF2295D, x891556DF, xE5E77F82);
|
||||
vsel(x3, x1A35669A, x6CF2295D, a4);
|
||||
vxor(*out4, *out4, x3);
|
||||
x22222222 = a1 ^ x77777777;
|
||||
x16BCEE97 = x349ECCB5 ^ x22222222;
|
||||
x0F080B04 = a4 & x0FF8EB24;
|
||||
x19B4E593 = x16BCEE97 ^ x0F080B04;
|
||||
x00 = x0B01234A | a2;
|
||||
x01 = x00 ^ x19B4E593;
|
||||
*out1 ^= x01;
|
||||
|
||||
vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
|
||||
vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
|
||||
vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
|
||||
vsel(x0, x369CC1D6, x1A676AB4, a4);
|
||||
vxor(*out1, *out1, x0);
|
||||
x5C5C5C5C = x1F1F6F6F ^ x43433333;
|
||||
x4448184C = x5C5C5C5C & ~x19B4E593;
|
||||
x2DDABE71 = x22225555 ^ x0FF8EB24;
|
||||
x6992A63D = x4448184C ^ x2DDABE71;
|
||||
x10 = x1F1F6F6F & a2;
|
||||
x11 = x10 ^ x6992A63D;
|
||||
*out2 ^= x11;
|
||||
}
|
||||
|
||||
static void
|
||||
s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
|
||||
u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
|
||||
u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
|
||||
u32 x86CD4C9B, x12E0FFFD, x942D9A67;
|
||||
u32 x142956AB, x455D45DF, x1C3EE619;
|
||||
u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
|
||||
u32 x840DBB67, x6DA19C1E, x925E63E1;
|
||||
u32 x9C3CA761, x257A75D5, xB946D2B4;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x33CC33CC;
|
||||
u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
|
||||
u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
|
||||
u32 x09030C06, x09030000, x336622FF, x3A6522FF;
|
||||
u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
|
||||
u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
|
||||
u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
|
||||
u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
|
||||
u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x555500FF, a1, a4, a5);
|
||||
vxor(x666633CC, a2, x555500FF);
|
||||
vsel(x606F30CF, x666633CC, a4, a3);
|
||||
vxor(x353A659A, a1, x606F30CF);
|
||||
vxor(x353A9A65, a5, x353A659A);
|
||||
vnot(xCAC5659A, x353A9A65);
|
||||
x33CC33CC = a2 ^ a5;
|
||||
|
||||
vsel(x353A6565, x353A659A, x353A9A65, a4);
|
||||
vsel(x0A3F0A6F, a3, a4, x353A6565);
|
||||
vxor(x6C5939A3, x666633CC, x0A3F0A6F);
|
||||
vxor(x5963A3C6, x353A9A65, x6C5939A3);
|
||||
x3333FFFF = a2 | a6;
|
||||
x11115555 = a1 & x3333FFFF;
|
||||
x22DD6699 = x33CC33CC ^ x11115555;
|
||||
x22DD9966 = a6 ^ x22DD6699;
|
||||
x00220099 = a5 & ~x22DD9966;
|
||||
|
||||
vsel(x35FF659A, a4, x353A659A, x353A6565);
|
||||
vxor(x3AF06A95, a3, x35FF659A);
|
||||
vsel(x05CF0A9F, a4, a3, x353A9A65);
|
||||
vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
|
||||
x00551144 = a1 & x22DD9966;
|
||||
x33662277 = a2 ^ x00551144;
|
||||
x5A5A5A5A = a1 ^ a3;
|
||||
x7B7E7A7F = x33662277 | x5A5A5A5A;
|
||||
x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
|
||||
|
||||
vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
|
||||
vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
|
||||
vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
|
||||
vsel(x0, xCAC5659A, x942D9A67, a6);
|
||||
vxor(*out1, *out1, x0);
|
||||
x09030C06 = a3 & x59A31CE6;
|
||||
x09030000 = x09030C06 & ~a6;
|
||||
x336622FF = x00220099 | x33662277;
|
||||
x3A6522FF = x09030000 ^ x336622FF;
|
||||
x30 = x3A6522FF & a4;
|
||||
x31 = x30 ^ x59A31CE6;
|
||||
*out4 ^= x31;
|
||||
|
||||
vsel(x142956AB, x353A659A, x942D9A67, a2);
|
||||
vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
|
||||
vxor(x1C3EE619, x5963A3C6, x455D45DF);
|
||||
vsel(x3, x5963A3C6, x1C3EE619, a6);
|
||||
vxor(*out4, *out4, x3);
|
||||
x484D494C = a2 ^ x7B7E7A7F;
|
||||
x0000B6B3 = a6 & ~x484D494C;
|
||||
x0F0FB9BC = a3 ^ x0000B6B3;
|
||||
x00FC00F9 = a5 & ~x09030C06;
|
||||
x0FFFB9FD = x0F0FB9BC | x00FC00F9;
|
||||
|
||||
vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
|
||||
vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
|
||||
vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
|
||||
vxor(x69A49C79, x555500FF, x3CF19C86);
|
||||
x5DF75DF7 = a1 | x59A31CE6;
|
||||
x116600F7 = x336622FF & x5DF75DF7;
|
||||
x1E69B94B = x0F0FB9BC ^ x116600F7;
|
||||
x1668B94B = x1E69B94B & ~x09030000;
|
||||
x20 = x00220099 | a4;
|
||||
x21 = x20 ^ x1668B94B;
|
||||
*out3 ^= x21;
|
||||
|
||||
vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
|
||||
vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
|
||||
vnot(x925E63E1, x6DA19C1E);
|
||||
vsel(x1, x925E63E1, x69A49C79, a6);
|
||||
vxor(*out2, *out2, x1);
|
||||
x7B7B7B7B = a2 | x5A5A5A5A;
|
||||
x411E5984 = x3A6522FF ^ x7B7B7B7B;
|
||||
x1FFFFDFD = x11115555 | x0FFFB9FD;
|
||||
x5EE1A479 = x411E5984 ^ x1FFFFDFD;
|
||||
|
||||
vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
|
||||
vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
|
||||
vxor(xB946D2B4, x9C3CA761, x257A75D5);
|
||||
vsel(x2, x16E94A97, xB946D2B4, a6);
|
||||
vxor(*out3, *out3, x2);
|
||||
x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
|
||||
x004B002D = a5 & ~x3CB4DFD2;
|
||||
xB7B2B6B3 = ~x484D494C;
|
||||
xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
|
||||
xCC82CDE5 = x004B002D ^ xCCC9CDC8;
|
||||
x10 = xCC82CDE5 & ~a4;
|
||||
x11 = x10 ^ x5EE1A479;
|
||||
*out2 ^= x11;
|
||||
|
||||
x0055EEBB = a6 ^ x00551144;
|
||||
x5A5AECE9 = a1 ^ x0F0FB9BC;
|
||||
x0050ECA9 = x0055EEBB & x5A5AECE9;
|
||||
xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
|
||||
xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
|
||||
x00 = x0FFFB9FD & ~a4;
|
||||
x01 = x00 ^ xC59A2D67;
|
||||
*out1 ^= x01;
|
||||
}
|
||||
|
||||
static void
|
||||
s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
|
||||
u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
|
||||
u32 x738F9C63, x11EF9867, x26DA9867;
|
||||
u32 x4B4B9C63, x4B666663, x4E639396;
|
||||
u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
|
||||
u32 xD728827B, x6698807B, x699C585B;
|
||||
u32 x738C847B, xA4A71E18, x74878E78;
|
||||
u32 x333D9639, x74879639, x8B7869C6;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
|
||||
u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
|
||||
u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
|
||||
u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
|
||||
u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
|
||||
u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
|
||||
u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
|
||||
u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x44447777, a2, a6, a3);
|
||||
vxor(x4B4B7878, a4, x44447777);
|
||||
vsel(x22772277, a3, a5, a2);
|
||||
vsel(x0505F5F5, a6, a2, a4);
|
||||
vsel(x220522F5, x22772277, x0505F5F5, a5);
|
||||
vxor(x694E5A8D, x4B4B7878, x220522F5);
|
||||
x0FF00FF0 = a4 ^ a5;
|
||||
x3CC33CC3 = a3 ^ x0FF00FF0;
|
||||
x00003CC3 = a6 & x3CC33CC3;
|
||||
x0F000F00 = a4 & x0FF00FF0;
|
||||
x5A555A55 = a2 ^ x0F000F00;
|
||||
x00001841 = x00003CC3 & x5A555A55;
|
||||
|
||||
vxor(x00FFFF00, a5, a6);
|
||||
vxor(x66666666, a2, a3);
|
||||
vsel(x32353235, a3, x220522F5, a4);
|
||||
vsel(x26253636, x66666666, x32353235, x4B4B7878);
|
||||
vxor(x26DAC936, x00FFFF00, x26253636);
|
||||
vsel(x0, x26DAC936, x694E5A8D, a1);
|
||||
vxor(*out1, *out1, x0);
|
||||
x00000F00 = a6 & x0F000F00;
|
||||
x33333C33 = a3 ^ x00000F00;
|
||||
x7B777E77 = x5A555A55 | x33333C33;
|
||||
x0FF0F00F = a6 ^ x0FF00FF0;
|
||||
x74878E78 = x7B777E77 ^ x0FF0F00F;
|
||||
x30 = a1 & ~x00001841;
|
||||
x31 = x30 ^ x74878E78;
|
||||
*out4 ^= x31;
|
||||
|
||||
vxor(x738F9C63, a2, x26DAC936);
|
||||
vsel(x11EF9867, x738F9C63, a5, x66666666);
|
||||
vsel(x26DA9867, x26DAC936, x11EF9867, a6);
|
||||
x003C003C = a5 & ~x3CC33CC3;
|
||||
x5A7D5A7D = x5A555A55 | x003C003C;
|
||||
x333300F0 = x00003CC3 ^ x33333C33;
|
||||
x694E5A8D = x5A7D5A7D ^ x333300F0;
|
||||
|
||||
vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
|
||||
vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
|
||||
vxor(x4E639396, x0505F5F5, x4B666663);
|
||||
x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
|
||||
x000F0303 = a4 & ~x0FF0CCCC;
|
||||
x5A505854 = x5A555A55 & ~x000F0303;
|
||||
x33CC000F = a5 ^ x333300F0;
|
||||
x699C585B = x5A505854 ^ x33CC000F;
|
||||
|
||||
vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
|
||||
vnot(xFF00FF00, a5);
|
||||
vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
|
||||
vxor(xB14EE41D, x4E4B393C, xFF05DD21);
|
||||
vsel(x1, xB14EE41D, x26DA9867, a1);
|
||||
vxor(*out2, *out2, x1);
|
||||
x7F878F78 = x0F000F00 | x74878E78;
|
||||
x21101013 = a3 & x699C585B;
|
||||
x7F979F7B = x7F878F78 | x21101013;
|
||||
x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
|
||||
x4F9493BB = x7F979F7B ^ x30030CC0;
|
||||
x00 = x4F9493BB & ~a1;
|
||||
x01 = x00 ^ x694E5A8D;
|
||||
*out1 ^= x01;
|
||||
|
||||
vxor(xD728827B, x66666666, xB14EE41D);
|
||||
vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
|
||||
vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
|
||||
vsel(x2, x699C585B, x4E639396, a1);
|
||||
vxor(*out3, *out3, x2);
|
||||
x6F9CDBFB = x699C585B | x4F9493BB;
|
||||
x0000DBFB = a6 & x6F9CDBFB;
|
||||
x00005151 = a2 & x0000DBFB;
|
||||
x26DAC936 = x694E5A8D ^ x4F9493BB;
|
||||
x26DA9867 = x00005151 ^ x26DAC936;
|
||||
|
||||
vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
|
||||
vxor(xA4A71E18, x738F9C63, xD728827B);
|
||||
vsel(x74878E78, x738C847B, xA4A71E18, a4);
|
||||
x27DA9877 = x21101013 | x26DA9867;
|
||||
x27DA438C = x0000DBFB ^ x27DA9877;
|
||||
x2625C9C9 = a5 ^ x26DAC936;
|
||||
x27FFCBCD = x27DA438C | x2625C9C9;
|
||||
x20 = x27FFCBCD & a1;
|
||||
x21 = x20 ^ x699C585B;
|
||||
*out3 ^= x21;
|
||||
|
||||
vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
|
||||
vsel(x74879639, x74878E78, x333D9639, a6);
|
||||
vnot(x8B7869C6, x74879639);
|
||||
vsel(x3, x74878E78, x8B7869C6, a1);
|
||||
vxor(*out4, *out4, x3);
|
||||
x27FF1036 = x0000DBFB ^ x27FFCBCD;
|
||||
x27FF103E = x003C003C | x27FF1036;
|
||||
xB06B6C44 = ~x4F9493BB;
|
||||
x97947C7A = x27FF103E ^ xB06B6C44;
|
||||
x10 = x97947C7A & ~a1;
|
||||
x11 = x10 ^ x26DA9867;
|
||||
*out2 ^= x11;
|
||||
}
|
||||
|
||||
static void
|
||||
s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
u32 * out1, u32 * out2, u32 * out3, u32 * out4)
|
||||
static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
|
||||
{
|
||||
u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
|
||||
u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
|
||||
u32 x3001F74E, x30555745, x693CD926;
|
||||
u32 x0C0CD926, x0C3F25E9, x38D696A5;
|
||||
u32 xC729695A;
|
||||
u32 x03D2117B, xC778395B, xCB471CB2;
|
||||
u32 x5425B13F, x56B3803F, x919AE965;
|
||||
u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
|
||||
u32 x0, x1, x2, x3;
|
||||
u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
|
||||
u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
|
||||
u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
|
||||
u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
|
||||
u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
|
||||
u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
|
||||
u32 xF700A600, x61008000, x03B7856B, x62B7056B;
|
||||
u32 x00, x01, x10, x11, x20, x21, x30, x31;
|
||||
|
||||
vsel(x0505F5F5, a5, a1, a3);
|
||||
vxor(x05FAF50A, a4, x0505F5F5);
|
||||
vsel(x0F0F00FF, a3, a4, a5);
|
||||
vsel(x22227777, a2, a5, a1);
|
||||
vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
|
||||
vxor(x34E9B34C, a2, x07DA807F);
|
||||
x0C0C0C0C = a3 & ~a2;
|
||||
x0000F0F0 = a5 & ~a3;
|
||||
x00FFF00F = a4 ^ x0000F0F0;
|
||||
x00555005 = a1 & x00FFF00F;
|
||||
x00515001 = x00555005 & ~x0C0C0C0C;
|
||||
|
||||
vsel(x00FFF00F, x05FAF50A, a4, a3);
|
||||
vsel(x0033FCCF, a5, x00FFF00F, a2);
|
||||
vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
|
||||
vsel(x0C0C3F3F, a3, a5, a2);
|
||||
vxor(x59698E63, x5565B15C, x0C0C3F3F);
|
||||
x33000330 = a2 & ~x00FFF00F;
|
||||
x77555775 = a1 | x33000330;
|
||||
x30303030 = a2 & ~a3;
|
||||
x3030CFCF = a5 ^ x30303030;
|
||||
x30104745 = x77555775 & x3030CFCF;
|
||||
x30555745 = x00555005 | x30104745;
|
||||
|
||||
vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
|
||||
vsel(x30555745, x3001F74E, a1, x00FFF00F);
|
||||
vxor(x693CD926, x59698E63, x30555745);
|
||||
vsel(x2, x693CD926, x59698E63, a6);
|
||||
vxor(*out3, *out3, x2);
|
||||
xFF000FF0 = ~x00FFF00F;
|
||||
xCF1048B5 = x30104745 ^ xFF000FF0;
|
||||
x080A080A = a3 & ~x77555775;
|
||||
xC71A40BF = xCF1048B5 ^ x080A080A;
|
||||
xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
|
||||
x10 = x00515001 | a6;
|
||||
x11 = x10 ^ xCB164CB3;
|
||||
*out2 ^= x11;
|
||||
|
||||
vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
|
||||
vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
|
||||
vxor(x38D696A5, x34E9B34C, x0C3F25E9);
|
||||
x9E4319E6 = a1 ^ xCB164CB3;
|
||||
x000019E6 = a5 & x9E4319E6;
|
||||
xF429738C = a2 ^ xC71A40BF;
|
||||
xF4296A6A = x000019E6 ^ xF429738C;
|
||||
xC729695A = x33000330 ^ xF4296A6A;
|
||||
|
||||
vnot(xC729695A, x38D696A5);
|
||||
xC47C3D2F = x30555745 ^ xF4296A6A;
|
||||
xF77F3F3F = a2 | xC47C3D2F;
|
||||
x9E43E619 = a5 ^ x9E4319E6;
|
||||
x693CD926 = xF77F3F3F ^ x9E43E619;
|
||||
x20 = x30555745 & a6;
|
||||
x21 = x20 ^ x693CD926;
|
||||
*out3 ^= x21;
|
||||
|
||||
vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
|
||||
vsel(xC778395B, xC729695A, x03D2117B, x30555745);
|
||||
vxor(xCB471CB2, x0C3F25E9, xC778395B);
|
||||
vsel(x1, xCB471CB2, x34E9B34C, a6);
|
||||
vxor(*out2, *out2, x1);
|
||||
xF719A695 = x3030CFCF ^ xC729695A;
|
||||
xF4FF73FF = a4 | xF429738C;
|
||||
x03E6D56A = xF719A695 ^ xF4FF73FF;
|
||||
x56B3803F = a1 ^ x03E6D56A;
|
||||
x30 = x56B3803F & a6;
|
||||
x31 = x30 ^ xC729695A;
|
||||
*out4 ^= x31;
|
||||
|
||||
vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
|
||||
vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
|
||||
vxor(x919AE965, xC729695A, x56B3803F);
|
||||
vsel(x3, xC729695A, x919AE965, a6);
|
||||
vxor(*out4, *out4, x3);
|
||||
|
||||
vsel(x17B3023F, x07DA807F, a2, x59698E63);
|
||||
vor(x75555755, a1, x30555745);
|
||||
vxor(x62E6556A, x17B3023F, x75555755);
|
||||
vxor(xA59E6C31, xC778395B, x62E6556A);
|
||||
vsel(x0, xA59E6C31, x38D696A5, a6);
|
||||
vxor(*out1, *out1, x0);
|
||||
xF700A600 = xF719A695 & ~a4;
|
||||
x61008000 = x693CD926 & xF700A600;
|
||||
x03B7856B = x00515001 ^ x03E6D56A;
|
||||
x62B7056B = x61008000 ^ x03B7856B;
|
||||
x00 = x62B7056B | a6;
|
||||
x01 = x00 ^ xC729695A;
|
||||
*out1 ^= x01;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
|
||||
@ -1463,8 +1532,8 @@ s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define KXX_DECL volatile
|
||||
#define sXXX_DECL volatile
|
||||
#define KXX_DECL
|
||||
#define sXXX_DECL
|
||||
#endif
|
||||
|
||||
#ifdef IS_GENERIC
|
||||
|
1423
OpenCL/m03000_a3.cl
1423
OpenCL/m03000_a3.cl
File diff suppressed because it is too large
Load Diff
@ -673,8 +673,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo
|
||||
* DES2
|
||||
*/
|
||||
|
||||
volatile const u32 bc = (b >> 24) | (c << 8);
|
||||
volatile const u32 cd = (c >> 24) | (d << 8);
|
||||
const u32 bc = (b >> 24) | (c << 8);
|
||||
const u32 cd = (c >> 24) | (d << 8);
|
||||
|
||||
transform_netntlmv1_key (bc, cd, key);
|
||||
|
||||
|
@ -725,8 +725,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo
|
||||
* DES2
|
||||
*/
|
||||
|
||||
volatile const u32 bc = (b >> 24) | (c << 8);
|
||||
volatile const u32 cd = (c >> 24) | (d << 8);
|
||||
const u32 bc = (b >> 24) | (c << 8);
|
||||
const u32 cd = (c >> 24) | (d << 8);
|
||||
|
||||
transform_netntlmv1_key (bc, cd, key);
|
||||
|
||||
|
@ -602,8 +602,8 @@ static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32
|
||||
* DES2
|
||||
*/
|
||||
|
||||
volatile const u32 bc = (b >> 24) | (c << 8);
|
||||
volatile const u32 cd = (c >> 24) | (d << 8);
|
||||
const u32 bc = (b >> 24) | (c << 8);
|
||||
const u32 cd = (c >> 24) | (d << 8);
|
||||
|
||||
transform_netntlmv1_key (bc, cd, key);
|
||||
|
||||
|
@ -2,7 +2,7 @@ oclHashcat v2.10
|
||||
================
|
||||
|
||||
NV users require ForceWare 346.59 or later (recommended 358.09 or later)
|
||||
AMD users require Catalyst 14.9 or later (recommended 15.9 or later)
|
||||
AMD users require Catalyst 14.9 or later (recommended 15.12 or later)
|
||||
|
||||
##
|
||||
## Features
|
||||
|
@ -6089,7 +6089,7 @@ int main (int argc, char **argv)
|
||||
|
||||
if (attack_mode != ATTACK_MODE_STRAIGHT)
|
||||
{
|
||||
if (weak_hash_threshold != WEAK_HASH_THRESHOLD)
|
||||
if ((weak_hash_threshold != WEAK_HASH_THRESHOLD) && (weak_hash_threshold != 0))
|
||||
{
|
||||
log_error ("ERROR: setting --weak-hash-threshold allowed only in straight-attack mode");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user