Merge branch 'master' into DCC2-NT

pull/3657/head
Jens Steube 9 months ago committed by GitHub
commit 1259ea03bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,9 +8,12 @@ root = true
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
trim_trailing_whitespace = true
[*.{pot,rule}]
trim_trailing_whitespace = false
[Makefile]
indent_style = tab

@ -3,7 +3,7 @@ hashcat build documentation
### Revision ###
* 1.5
* 1.6
### Author ###
@ -61,6 +61,10 @@ Otherwise:
- Cached kernels go to: $HOME/.cache/hashcat
- Potfiles go to: $HOME/.local/share/hashcat/
### Building hashcat for Windows (using macOS) ###
Refer to [BUILD_macOS.md](BUILD_macOS.md)
### Building hashcat for Windows (using Windows Subsystem for Linux) ###
Refer to [BUILD_WSL.md](BUILD_WSL.md)

@ -0,0 +1,27 @@
# Compiling hashcat for Windows with macOS.
Tested on macOS 12.6.6 M1.
Make sure to have the HomeBrew upgraded.
### Installation ###
```
brew install mingw-w64
git clone https://github.com/hashcat/hashcat
git clone https://github.com/win-iconv/win-iconv
cd win-iconv/
patch < ../hashcat/tools/win-iconv-64.diff
sudo make install
cd ../
```
### Building ###
You've already cloned the latest master revision of hashcat repository above, so switch to the folder and type "make win" to start compiling hashcat
```
cd hashcat/
make win
```
The process may take a while, please be patient.

@ -1187,7 +1187,7 @@ DECLSPEC void mul_mod128 (PRIVATE_AS u32 *x, PRIVATE_AS const u32 *y, PRIVATE_AS
t[62] += t[61] < p[125];
t[63] += t[62] < p[126];
// VERY IMPORTANT: if we have an overflow at the highest u32, we need to do the substraction
// VERY IMPORTANT: if we have an overflow at the highest u32, we need to do the subtraction
const u32 c = t[63] < p[127]; // carry / overflow

@ -0,0 +1,732 @@
/*
* CAST OpenCL
*
* Copyright (c) 2018, magnum.
* This software is hereby released to the general public under
* the following terms: Redistribution and use in source and binary
* forms, with or without modification, are permitted.
*
* Based on version in Truecrypt 7.1a sources
* Based on cast.cpp - written and placed in the public domain by
* Wei Dai and Leonard Janke
* Based on Steve Reid's public domain cast.c
*
* atom - changed S boxes from constant mem to shared mem
*/
#ifndef _OPENCL_CAST_H
#define _OPENCL_CAST_H
// #include "opencl_misc.h"
#define GET_UINT32BE(n, b, i) \
{ \
(n) = ((uint) (b)[(i)] << 24) \
| ((uint) (b)[(i) + 1] << 16) \
| ((uint) (b)[(i) + 2] << 8) \
| ((uint) (b)[(i) + 3] ); \
}
#define PUT_UINT32BE(n, b, i) \
{ \
(b)[(i) ] = (u8) ((n) >> 24); \
(b)[(i) + 1] = (u8) ((n) >> 16); \
(b)[(i) + 2] = (u8) ((n) >> 8); \
(b)[(i) + 3] = (u8) ((n) ); \
}
typedef struct {
uint K[32];
} CAST_KEY;
#define GETBYTE(x, y) (uint)(u8)((x)>>(8*(y)))
/* Macros to access 8-bit bytes out of a 32-bit word */
#define U8a(x) GETBYTE(x,3)
#define U8b(x) GETBYTE(x,2)
#define U8c(x) GETBYTE(x,1)
#define U8d(x) GETBYTE(x,0)
/* CAST S-boxes */
CONSTANT_AS uint S[8][256] = {
{
0x30FB40D4UL, 0x9FA0FF0BUL, 0x6BECCD2FUL, 0x3F258C7AUL,
0x1E213F2FUL, 0x9C004DD3UL, 0x6003E540UL, 0xCF9FC949UL,
0xBFD4AF27UL, 0x88BBBDB5UL, 0xE2034090UL, 0x98D09675UL,
0x6E63A0E0UL, 0x15C361D2UL, 0xC2E7661DUL, 0x22D4FF8EUL,
0x28683B6FUL, 0xC07FD059UL, 0xFF2379C8UL, 0x775F50E2UL,
0x43C340D3UL, 0xDF2F8656UL, 0x887CA41AUL, 0xA2D2BD2DUL,
0xA1C9E0D6UL, 0x346C4819UL, 0x61B76D87UL, 0x22540F2FUL,
0x2ABE32E1UL, 0xAA54166BUL, 0x22568E3AUL, 0xA2D341D0UL,
0x66DB40C8UL, 0xA784392FUL, 0x004DFF2FUL, 0x2DB9D2DEUL,
0x97943FACUL, 0x4A97C1D8UL, 0x527644B7UL, 0xB5F437A7UL,
0xB82CBAEFUL, 0xD751D159UL, 0x6FF7F0EDUL, 0x5A097A1FUL,
0x827B68D0UL, 0x90ECF52EUL, 0x22B0C054UL, 0xBC8E5935UL,
0x4B6D2F7FUL, 0x50BB64A2UL, 0xD2664910UL, 0xBEE5812DUL,
0xB7332290UL, 0xE93B159FUL, 0xB48EE411UL, 0x4BFF345DUL,
0xFD45C240UL, 0xAD31973FUL, 0xC4F6D02EUL, 0x55FC8165UL,
0xD5B1CAADUL, 0xA1AC2DAEUL, 0xA2D4B76DUL, 0xC19B0C50UL,
0x882240F2UL, 0x0C6E4F38UL, 0xA4E4BFD7UL, 0x4F5BA272UL,
0x564C1D2FUL, 0xC59C5319UL, 0xB949E354UL, 0xB04669FEUL,
0xB1B6AB8AUL, 0xC71358DDUL, 0x6385C545UL, 0x110F935DUL,
0x57538AD5UL, 0x6A390493UL, 0xE63D37E0UL, 0x2A54F6B3UL,
0x3A787D5FUL, 0x6276A0B5UL, 0x19A6FCDFUL, 0x7A42206AUL,
0x29F9D4D5UL, 0xF61B1891UL, 0xBB72275EUL, 0xAA508167UL,
0x38901091UL, 0xC6B505EBUL, 0x84C7CB8CUL, 0x2AD75A0FUL,
0x874A1427UL, 0xA2D1936BUL, 0x2AD286AFUL, 0xAA56D291UL,
0xD7894360UL, 0x425C750DUL, 0x93B39E26UL, 0x187184C9UL,
0x6C00B32DUL, 0x73E2BB14UL, 0xA0BEBC3CUL, 0x54623779UL,
0x64459EABUL, 0x3F328B82UL, 0x7718CF82UL, 0x59A2CEA6UL,
0x04EE002EUL, 0x89FE78E6UL, 0x3FAB0950UL, 0x325FF6C2UL,
0x81383F05UL, 0x6963C5C8UL, 0x76CB5AD6UL, 0xD49974C9UL,
0xCA180DCFUL, 0x380782D5UL, 0xC7FA5CF6UL, 0x8AC31511UL,
0x35E79E13UL, 0x47DA91D0UL, 0xF40F9086UL, 0xA7E2419EUL,
0x31366241UL, 0x051EF495UL, 0xAA573B04UL, 0x4A805D8DUL,
0x548300D0UL, 0x00322A3CUL, 0xBF64CDDFUL, 0xBA57A68EUL,
0x75C6372BUL, 0x50AFD341UL, 0xA7C13275UL, 0x915A0BF5UL,
0x6B54BFABUL, 0x2B0B1426UL, 0xAB4CC9D7UL, 0x449CCD82UL,
0xF7FBF265UL, 0xAB85C5F3UL, 0x1B55DB94UL, 0xAAD4E324UL,
0xCFA4BD3FUL, 0x2DEAA3E2UL, 0x9E204D02UL, 0xC8BD25ACUL,
0xEADF55B3UL, 0xD5BD9E98UL, 0xE31231B2UL, 0x2AD5AD6CUL,
0x954329DEUL, 0xADBE4528UL, 0xD8710F69UL, 0xAA51C90FUL,
0xAA786BF6UL, 0x22513F1EUL, 0xAA51A79BUL, 0x2AD344CCUL,
0x7B5A41F0UL, 0xD37CFBADUL, 0x1B069505UL, 0x41ECE491UL,
0xB4C332E6UL, 0x032268D4UL, 0xC9600ACCUL, 0xCE387E6DUL,
0xBF6BB16CUL, 0x6A70FB78UL, 0x0D03D9C9UL, 0xD4DF39DEUL,
0xE01063DAUL, 0x4736F464UL, 0x5AD328D8UL, 0xB347CC96UL,
0x75BB0FC3UL, 0x98511BFBUL, 0x4FFBCC35UL, 0xB58BCF6AUL,
0xE11F0ABCUL, 0xBFC5FE4AUL, 0xA70AEC10UL, 0xAC39570AUL,
0x3F04442FUL, 0x6188B153UL, 0xE0397A2EUL, 0x5727CB79UL,
0x9CEB418FUL, 0x1CACD68DUL, 0x2AD37C96UL, 0x0175CB9DUL,
0xC69DFF09UL, 0xC75B65F0UL, 0xD9DB40D8UL, 0xEC0E7779UL,
0x4744EAD4UL, 0xB11C3274UL, 0xDD24CB9EUL, 0x7E1C54BDUL,
0xF01144F9UL, 0xD2240EB1UL, 0x9675B3FDUL, 0xA3AC3755UL,
0xD47C27AFUL, 0x51C85F4DUL, 0x56907596UL, 0xA5BB15E6UL,
0x580304F0UL, 0xCA042CF1UL, 0x011A37EAUL, 0x8DBFAADBUL,
0x35BA3E4AUL, 0x3526FFA0UL, 0xC37B4D09UL, 0xBC306ED9UL,
0x98A52666UL, 0x5648F725UL, 0xFF5E569DUL, 0x0CED63D0UL,
0x7C63B2CFUL, 0x700B45E1UL, 0xD5EA50F1UL, 0x85A92872UL,
0xAF1FBDA7UL, 0xD4234870UL, 0xA7870BF3UL, 0x2D3B4D79UL,
0x42E04198UL, 0x0CD0EDE7UL, 0x26470DB8UL, 0xF881814CUL,
0x474D6AD7UL, 0x7C0C5E5CUL, 0xD1231959UL, 0x381B7298UL,
0xF5D2F4DBUL, 0xAB838653UL, 0x6E2F1E23UL, 0x83719C9EUL,
0xBD91E046UL, 0x9A56456EUL, 0xDC39200CUL, 0x20C8C571UL,
0x962BDA1CUL, 0xE1E696FFUL, 0xB141AB08UL, 0x7CCA89B9UL,
0x1A69E783UL, 0x02CC4843UL, 0xA2F7C579UL, 0x429EF47DUL,
0x427B169CUL, 0x5AC9F049UL, 0xDD8F0F00UL, 0x5C8165BFUL
},
{
0x1F201094UL, 0xEF0BA75BUL, 0x69E3CF7EUL, 0x393F4380UL,
0xFE61CF7AUL, 0xEEC5207AUL, 0x55889C94UL, 0x72FC0651UL,
0xADA7EF79UL, 0x4E1D7235UL, 0xD55A63CEUL, 0xDE0436BAUL,
0x99C430EFUL, 0x5F0C0794UL, 0x18DCDB7DUL, 0xA1D6EFF3UL,
0xA0B52F7BUL, 0x59E83605UL, 0xEE15B094UL, 0xE9FFD909UL,
0xDC440086UL, 0xEF944459UL, 0xBA83CCB3UL, 0xE0C3CDFBUL,
0xD1DA4181UL, 0x3B092AB1UL, 0xF997F1C1UL, 0xA5E6CF7BUL,
0x01420DDBUL, 0xE4E7EF5BUL, 0x25A1FF41UL, 0xE180F806UL,
0x1FC41080UL, 0x179BEE7AUL, 0xD37AC6A9UL, 0xFE5830A4UL,
0x98DE8B7FUL, 0x77E83F4EUL, 0x79929269UL, 0x24FA9F7BUL,
0xE113C85BUL, 0xACC40083UL, 0xD7503525UL, 0xF7EA615FUL,
0x62143154UL, 0x0D554B63UL, 0x5D681121UL, 0xC866C359UL,
0x3D63CF73UL, 0xCEE234C0UL, 0xD4D87E87UL, 0x5C672B21UL,
0x071F6181UL, 0x39F7627FUL, 0x361E3084UL, 0xE4EB573BUL,
0x602F64A4UL, 0xD63ACD9CUL, 0x1BBC4635UL, 0x9E81032DUL,
0x2701F50CUL, 0x99847AB4UL, 0xA0E3DF79UL, 0xBA6CF38CUL,
0x10843094UL, 0x2537A95EUL, 0xF46F6FFEUL, 0xA1FF3B1FUL,
0x208CFB6AUL, 0x8F458C74UL, 0xD9E0A227UL, 0x4EC73A34UL,
0xFC884F69UL, 0x3E4DE8DFUL, 0xEF0E0088UL, 0x3559648DUL,
0x8A45388CUL, 0x1D804366UL, 0x721D9BFDUL, 0xA58684BBUL,
0xE8256333UL, 0x844E8212UL, 0x128D8098UL, 0xFED33FB4UL,
0xCE280AE1UL, 0x27E19BA5UL, 0xD5A6C252UL, 0xE49754BDUL,
0xC5D655DDUL, 0xEB667064UL, 0x77840B4DUL, 0xA1B6A801UL,
0x84DB26A9UL, 0xE0B56714UL, 0x21F043B7UL, 0xE5D05860UL,
0x54F03084UL, 0x066FF472UL, 0xA31AA153UL, 0xDADC4755UL,
0xB5625DBFUL, 0x68561BE6UL, 0x83CA6B94UL, 0x2D6ED23BUL,
0xECCF01DBUL, 0xA6D3D0BAUL, 0xB6803D5CUL, 0xAF77A709UL,
0x33B4A34CUL, 0x397BC8D6UL, 0x5EE22B95UL, 0x5F0E5304UL,
0x81ED6F61UL, 0x20E74364UL, 0xB45E1378UL, 0xDE18639BUL,
0x881CA122UL, 0xB96726D1UL, 0x8049A7E8UL, 0x22B7DA7BUL,
0x5E552D25UL, 0x5272D237UL, 0x79D2951CUL, 0xC60D894CUL,
0x488CB402UL, 0x1BA4FE5BUL, 0xA4B09F6BUL, 0x1CA815CFUL,
0xA20C3005UL, 0x8871DF63UL, 0xB9DE2FCBUL, 0x0CC6C9E9UL,
0x0BEEFF53UL, 0xE3214517UL, 0xB4542835UL, 0x9F63293CUL,
0xEE41E729UL, 0x6E1D2D7CUL, 0x50045286UL, 0x1E6685F3UL,
0xF33401C6UL, 0x30A22C95UL, 0x31A70850UL, 0x60930F13UL,
0x73F98417UL, 0xA1269859UL, 0xEC645C44UL, 0x52C877A9UL,
0xCDFF33A6UL, 0xA02B1741UL, 0x7CBAD9A2UL, 0x2180036FUL,
0x50D99C08UL, 0xCB3F4861UL, 0xC26BD765UL, 0x64A3F6ABUL,
0x80342676UL, 0x25A75E7BUL, 0xE4E6D1FCUL, 0x20C710E6UL,
0xCDF0B680UL, 0x17844D3BUL, 0x31EEF84DUL, 0x7E0824E4UL,
0x2CCB49EBUL, 0x846A3BAEUL, 0x8FF77888UL, 0xEE5D60F6UL,
0x7AF75673UL, 0x2FDD5CDBUL, 0xA11631C1UL, 0x30F66F43UL,
0xB3FAEC54UL, 0x157FD7FAUL, 0xEF8579CCUL, 0xD152DE58UL,
0xDB2FFD5EUL, 0x8F32CE19UL, 0x306AF97AUL, 0x02F03EF8UL,
0x99319AD5UL, 0xC242FA0FUL, 0xA7E3EBB0UL, 0xC68E4906UL,
0xB8DA230CUL, 0x80823028UL, 0xDCDEF3C8UL, 0xD35FB171UL,
0x088A1BC8UL, 0xBEC0C560UL, 0x61A3C9E8UL, 0xBCA8F54DUL,
0xC72FEFFAUL, 0x22822E99UL, 0x82C570B4UL, 0xD8D94E89UL,
0x8B1C34BCUL, 0x301E16E6UL, 0x273BE979UL, 0xB0FFEAA6UL,
0x61D9B8C6UL, 0x00B24869UL, 0xB7FFCE3FUL, 0x08DC283BUL,
0x43DAF65AUL, 0xF7E19798UL, 0x7619B72FUL, 0x8F1C9BA4UL,
0xDC8637A0UL, 0x16A7D3B1UL, 0x9FC393B7UL, 0xA7136EEBUL,
0xC6BCC63EUL, 0x1A513742UL, 0xEF6828BCUL, 0x520365D6UL,
0x2D6A77ABUL, 0x3527ED4BUL, 0x821FD216UL, 0x095C6E2EUL,
0xDB92F2FBUL, 0x5EEA29CBUL, 0x145892F5UL, 0x91584F7FUL,
0x5483697BUL, 0x2667A8CCUL, 0x85196048UL, 0x8C4BACEAUL,
0x833860D4UL, 0x0D23E0F9UL, 0x6C387E8AUL, 0x0AE6D249UL,
0xB284600CUL, 0xD835731DUL, 0xDCB1C647UL, 0xAC4C56EAUL,
0x3EBD81B3UL, 0x230EABB0UL, 0x6438BC87UL, 0xF0B5B1FAUL,
0x8F5EA2B3UL, 0xFC184642UL, 0x0A036B7AUL, 0x4FB089BDUL,
0x649DA589UL, 0xA345415EUL, 0x5C038323UL, 0x3E5D3BB9UL,
0x43D79572UL, 0x7E6DD07CUL, 0x06DFDF1EUL, 0x6C6CC4EFUL,
0x7160A539UL, 0x73BFBE70UL, 0x83877605UL, 0x4523ECF1UL
},
{
0x8DEFC240UL, 0x25FA5D9FUL, 0xEB903DBFUL, 0xE810C907UL,
0x47607FFFUL, 0x369FE44BUL, 0x8C1FC644UL, 0xAECECA90UL,
0xBEB1F9BFUL, 0xEEFBCAEAUL, 0xE8CF1950UL, 0x51DF07AEUL,
0x920E8806UL, 0xF0AD0548UL, 0xE13C8D83UL, 0x927010D5UL,
0x11107D9FUL, 0x07647DB9UL, 0xB2E3E4D4UL, 0x3D4F285EUL,
0xB9AFA820UL, 0xFADE82E0UL, 0xA067268BUL, 0x8272792EUL,
0x553FB2C0UL, 0x489AE22BUL, 0xD4EF9794UL, 0x125E3FBCUL,
0x21FFFCEEUL, 0x825B1BFDUL, 0x9255C5EDUL, 0x1257A240UL,
0x4E1A8302UL, 0xBAE07FFFUL, 0x528246E7UL, 0x8E57140EUL,
0x3373F7BFUL, 0x8C9F8188UL, 0xA6FC4EE8UL, 0xC982B5A5UL,
0xA8C01DB7UL, 0x579FC264UL, 0x67094F31UL, 0xF2BD3F5FUL,
0x40FFF7C1UL, 0x1FB78DFCUL, 0x8E6BD2C1UL, 0x437BE59BUL,
0x99B03DBFUL, 0xB5DBC64BUL, 0x638DC0E6UL, 0x55819D99UL,
0xA197C81CUL, 0x4A012D6EUL, 0xC5884A28UL, 0xCCC36F71UL,
0xB843C213UL, 0x6C0743F1UL, 0x8309893CUL, 0x0FEDDD5FUL,
0x2F7FE850UL, 0xD7C07F7EUL, 0x02507FBFUL, 0x5AFB9A04UL,
0xA747D2D0UL, 0x1651192EUL, 0xAF70BF3EUL, 0x58C31380UL,
0x5F98302EUL, 0x727CC3C4UL, 0x0A0FB402UL, 0x0F7FEF82UL,
0x8C96FDADUL, 0x5D2C2AAEUL, 0x8EE99A49UL, 0x50DA88B8UL,
0x8427F4A0UL, 0x1EAC5790UL, 0x796FB449UL, 0x8252DC15UL,
0xEFBD7D9BUL, 0xA672597DUL, 0xADA840D8UL, 0x45F54504UL,
0xFA5D7403UL, 0xE83EC305UL, 0x4F91751AUL, 0x925669C2UL,
0x23EFE941UL, 0xA903F12EUL, 0x60270DF2UL, 0x0276E4B6UL,
0x94FD6574UL, 0x927985B2UL, 0x8276DBCBUL, 0x02778176UL,
0xF8AF918DUL, 0x4E48F79EUL, 0x8F616DDFUL, 0xE29D840EUL,
0x842F7D83UL, 0x340CE5C8UL, 0x96BBB682UL, 0x93B4B148UL,
0xEF303CABUL, 0x984FAF28UL, 0x779FAF9BUL, 0x92DC560DUL,
0x224D1E20UL, 0x8437AA88UL, 0x7D29DC96UL, 0x2756D3DCUL,
0x8B907CEEUL, 0xB51FD240UL, 0xE7C07CE3UL, 0xE566B4A1UL,
0xC3E9615EUL, 0x3CF8209DUL, 0x6094D1E3UL, 0xCD9CA341UL,
0x5C76460EUL, 0x00EA983BUL, 0xD4D67881UL, 0xFD47572CUL,
0xF76CEDD9UL, 0xBDA8229CUL, 0x127DADAAUL, 0x438A074EUL,
0x1F97C090UL, 0x081BDB8AUL, 0x93A07EBEUL, 0xB938CA15UL,
0x97B03CFFUL, 0x3DC2C0F8UL, 0x8D1AB2ECUL, 0x64380E51UL,
0x68CC7BFBUL, 0xD90F2788UL, 0x12490181UL, 0x5DE5FFD4UL,
0xDD7EF86AUL, 0x76A2E214UL, 0xB9A40368UL, 0x925D958FUL,
0x4B39FFFAUL, 0xBA39AEE9UL, 0xA4FFD30BUL, 0xFAF7933BUL,
0x6D498623UL, 0x193CBCFAUL, 0x27627545UL, 0x825CF47AUL,
0x61BD8BA0UL, 0xD11E42D1UL, 0xCEAD04F4UL, 0x127EA392UL,
0x10428DB7UL, 0x8272A972UL, 0x9270C4A8UL, 0x127DE50BUL,
0x285BA1C8UL, 0x3C62F44FUL, 0x35C0EAA5UL, 0xE805D231UL,
0x428929FBUL, 0xB4FCDF82UL, 0x4FB66A53UL, 0x0E7DC15BUL,
0x1F081FABUL, 0x108618AEUL, 0xFCFD086DUL, 0xF9FF2889UL,
0x694BCC11UL, 0x236A5CAEUL, 0x12DECA4DUL, 0x2C3F8CC5UL,
0xD2D02DFEUL, 0xF8EF5896UL, 0xE4CF52DAUL, 0x95155B67UL,
0x494A488CUL, 0xB9B6A80CUL, 0x5C8F82BCUL, 0x89D36B45UL,
0x3A609437UL, 0xEC00C9A9UL, 0x44715253UL, 0x0A874B49UL,
0xD773BC40UL, 0x7C34671CUL, 0x02717EF6UL, 0x4FEB5536UL,
0xA2D02FFFUL, 0xD2BF60C4UL, 0xD43F03C0UL, 0x50B4EF6DUL,
0x07478CD1UL, 0x006E1888UL, 0xA2E53F55UL, 0xB9E6D4BCUL,
0xA2048016UL, 0x97573833UL, 0xD7207D67UL, 0xDE0F8F3DUL,
0x72F87B33UL, 0xABCC4F33UL, 0x7688C55DUL, 0x7B00A6B0UL,
0x947B0001UL, 0x570075D2UL, 0xF9BB88F8UL, 0x8942019EUL,
0x4264A5FFUL, 0x856302E0UL, 0x72DBD92BUL, 0xEE971B69UL,
0x6EA22FDEUL, 0x5F08AE2BUL, 0xAF7A616DUL, 0xE5C98767UL,
0xCF1FEBD2UL, 0x61EFC8C2UL, 0xF1AC2571UL, 0xCC8239C2UL,
0x67214CB8UL, 0xB1E583D1UL, 0xB7DC3E62UL, 0x7F10BDCEUL,
0xF90A5C38UL, 0x0FF0443DUL, 0x606E6DC6UL, 0x60543A49UL,
0x5727C148UL, 0x2BE98A1DUL, 0x8AB41738UL, 0x20E1BE24UL,
0xAF96DA0FUL, 0x68458425UL, 0x99833BE5UL, 0x600D457DUL,
0x282F9350UL, 0x8334B362UL, 0xD91D1120UL, 0x2B6D8DA0UL,
0x642B1E31UL, 0x9C305A00UL, 0x52BCE688UL, 0x1B03588AUL,
0xF7BAEFD5UL, 0x4142ED9CUL, 0xA4315C11UL, 0x83323EC5UL,
0xDFEF4636UL, 0xA133C501UL, 0xE9D3531CUL, 0xEE353783UL
},
{
0x9DB30420UL, 0x1FB6E9DEUL, 0xA7BE7BEFUL, 0xD273A298UL,
0x4A4F7BDBUL, 0x64AD8C57UL, 0x85510443UL, 0xFA020ED1UL,
0x7E287AFFUL, 0xE60FB663UL, 0x095F35A1UL, 0x79EBF120UL,
0xFD059D43UL, 0x6497B7B1UL, 0xF3641F63UL, 0x241E4ADFUL,
0x28147F5FUL, 0x4FA2B8CDUL, 0xC9430040UL, 0x0CC32220UL,
0xFDD30B30UL, 0xC0A5374FUL, 0x1D2D00D9UL, 0x24147B15UL,
0xEE4D111AUL, 0x0FCA5167UL, 0x71FF904CUL, 0x2D195FFEUL,
0x1A05645FUL, 0x0C13FEFEUL, 0x081B08CAUL, 0x05170121UL,
0x80530100UL, 0xE83E5EFEUL, 0xAC9AF4F8UL, 0x7FE72701UL,
0xD2B8EE5FUL, 0x06DF4261UL, 0xBB9E9B8AUL, 0x7293EA25UL,
0xCE84FFDFUL, 0xF5718801UL, 0x3DD64B04UL, 0xA26F263BUL,
0x7ED48400UL, 0x547EEBE6UL, 0x446D4CA0UL, 0x6CF3D6F5UL,
0x2649ABDFUL, 0xAEA0C7F5UL, 0x36338CC1UL, 0x503F7E93UL,
0xD3772061UL, 0x11B638E1UL, 0x72500E03UL, 0xF80EB2BBUL,
0xABE0502EUL, 0xEC8D77DEUL, 0x57971E81UL, 0xE14F6746UL,
0xC9335400UL, 0x6920318FUL, 0x081DBB99UL, 0xFFC304A5UL,
0x4D351805UL, 0x7F3D5CE3UL, 0xA6C866C6UL, 0x5D5BCCA9UL,
0xDAEC6FEAUL, 0x9F926F91UL, 0x9F46222FUL, 0x3991467DUL,
0xA5BF6D8EUL, 0x1143C44FUL, 0x43958302UL, 0xD0214EEBUL,
0x022083B8UL, 0x3FB6180CUL, 0x18F8931EUL, 0x281658E6UL,
0x26486E3EUL, 0x8BD78A70UL, 0x7477E4C1UL, 0xB506E07CUL,
0xF32D0A25UL, 0x79098B02UL, 0xE4EABB81UL, 0x28123B23UL,
0x69DEAD38UL, 0x1574CA16UL, 0xDF871B62UL, 0x211C40B7UL,
0xA51A9EF9UL, 0x0014377BUL, 0x041E8AC8UL, 0x09114003UL,
0xBD59E4D2UL, 0xE3D156D5UL, 0x4FE876D5UL, 0x2F91A340UL,
0x557BE8DEUL, 0x00EAE4A7UL, 0x0CE5C2ECUL, 0x4DB4BBA6UL,
0xE756BDFFUL, 0xDD3369ACUL, 0xEC17B035UL, 0x06572327UL,
0x99AFC8B0UL, 0x56C8C391UL, 0x6B65811CUL, 0x5E146119UL,
0x6E85CB75UL, 0xBE07C002UL, 0xC2325577UL, 0x893FF4ECUL,
0x5BBFC92DUL, 0xD0EC3B25UL, 0xB7801AB7UL, 0x8D6D3B24UL,
0x20C763EFUL, 0xC366A5FCUL, 0x9C382880UL, 0x0ACE3205UL,
0xAAC9548AUL, 0xECA1D7C7UL, 0x041AFA32UL, 0x1D16625AUL,
0x6701902CUL, 0x9B757A54UL, 0x31D477F7UL, 0x9126B031UL,
0x36CC6FDBUL, 0xC70B8B46UL, 0xD9E66A48UL, 0x56E55A79UL,
0x026A4CEBUL, 0x52437EFFUL, 0x2F8F76B4UL, 0x0DF980A5UL,
0x8674CDE3UL, 0xEDDA04EBUL, 0x17A9BE04UL, 0x2C18F4DFUL,
0xB7747F9DUL, 0xAB2AF7B4UL, 0xEFC34D20UL, 0x2E096B7CUL,
0x1741A254UL, 0xE5B6A035UL, 0x213D42F6UL, 0x2C1C7C26UL,
0x61C2F50FUL, 0x6552DAF9UL, 0xD2C231F8UL, 0x25130F69UL,
0xD8167FA2UL, 0x0418F2C8UL, 0x001A96A6UL, 0x0D1526ABUL,
0x63315C21UL, 0x5E0A72ECUL, 0x49BAFEFDUL, 0x187908D9UL,
0x8D0DBD86UL, 0x311170A7UL, 0x3E9B640CUL, 0xCC3E10D7UL,
0xD5CAD3B6UL, 0x0CAEC388UL, 0xF73001E1UL, 0x6C728AFFUL,
0x71EAE2A1UL, 0x1F9AF36EUL, 0xCFCBD12FUL, 0xC1DE8417UL,
0xAC07BE6BUL, 0xCB44A1D8UL, 0x8B9B0F56UL, 0x013988C3UL,
0xB1C52FCAUL, 0xB4BE31CDUL, 0xD8782806UL, 0x12A3A4E2UL,
0x6F7DE532UL, 0x58FD7EB6UL, 0xD01EE900UL, 0x24ADFFC2UL,
0xF4990FC5UL, 0x9711AAC5UL, 0x001D7B95UL, 0x82E5E7D2UL,
0x109873F6UL, 0x00613096UL, 0xC32D9521UL, 0xADA121FFUL,
0x29908415UL, 0x7FBB977FUL, 0xAF9EB3DBUL, 0x29C9ED2AUL,
0x5CE2A465UL, 0xA730F32CUL, 0xD0AA3FE8UL, 0x8A5CC091UL,
0xD49E2CE7UL, 0x0CE454A9UL, 0xD60ACD86UL, 0x015F1919UL,
0x77079103UL, 0xDEA03AF6UL, 0x78A8565EUL, 0xDEE356DFUL,
0x21F05CBEUL, 0x8B75E387UL, 0xB3C50651UL, 0xB8A5C3EFUL,
0xD8EEB6D2UL, 0xE523BE77UL, 0xC2154529UL, 0x2F69EFDFUL,
0xAFE67AFBUL, 0xF470C4B2UL, 0xF3E0EB5BUL, 0xD6CC9876UL,
0x39E4460CUL, 0x1FDA8538UL, 0x1987832FUL, 0xCA007367UL,
0xA99144F8UL, 0x296B299EUL, 0x492FC295UL, 0x9266BEABUL,
0xB5676E69UL, 0x9BD3DDDAUL, 0xDF7E052FUL, 0xDB25701CUL,
0x1B5E51EEUL, 0xF65324E6UL, 0x6AFCE36CUL, 0x0316CC04UL,
0x8644213EUL, 0xB7DC59D0UL, 0x7965291FUL, 0xCCD6FD43UL,
0x41823979UL, 0x932BCDF6UL, 0xB657C34DUL, 0x4EDFD282UL,
0x7AE5290CUL, 0x3CB9536BUL, 0x851E20FEUL, 0x9833557EUL,
0x13ECF0B0UL, 0xD3FFB372UL, 0x3F85C5C1UL, 0x0AEF7ED2UL
},
{
0x7EC90C04UL, 0x2C6E74B9UL, 0x9B0E66DFUL, 0xA6337911UL,
0xB86A7FFFUL, 0x1DD358F5UL, 0x44DD9D44UL, 0x1731167FUL,
0x08FBF1FAUL, 0xE7F511CCUL, 0xD2051B00UL, 0x735ABA00UL,
0x2AB722D8UL, 0x386381CBUL, 0xACF6243AUL, 0x69BEFD7AUL,
0xE6A2E77FUL, 0xF0C720CDUL, 0xC4494816UL, 0xCCF5C180UL,
0x38851640UL, 0x15B0A848UL, 0xE68B18CBUL, 0x4CAADEFFUL,
0x5F480A01UL, 0x0412B2AAUL, 0x259814FCUL, 0x41D0EFE2UL,
0x4E40B48DUL, 0x248EB6FBUL, 0x8DBA1CFEUL, 0x41A99B02UL,
0x1A550A04UL, 0xBA8F65CBUL, 0x7251F4E7UL, 0x95A51725UL,
0xC106ECD7UL, 0x97A5980AUL, 0xC539B9AAUL, 0x4D79FE6AUL,
0xF2F3F763UL, 0x68AF8040UL, 0xED0C9E56UL, 0x11B4958BUL,
0xE1EB5A88UL, 0x8709E6B0UL, 0xD7E07156UL, 0x4E29FEA7UL,
0x6366E52DUL, 0x02D1C000UL, 0xC4AC8E05UL, 0x9377F571UL,
0x0C05372AUL, 0x578535F2UL, 0x2261BE02UL, 0xD642A0C9UL,
0xDF13A280UL, 0x74B55BD2UL, 0x682199C0UL, 0xD421E5ECUL,
0x53FB3CE8UL, 0xC8ADEDB3UL, 0x28A87FC9UL, 0x3D959981UL,
0x5C1FF900UL, 0xFE38D399UL, 0x0C4EFF0BUL, 0x062407EAUL,
0xAA2F4FB1UL, 0x4FB96976UL, 0x90C79505UL, 0xB0A8A774UL,
0xEF55A1FFUL, 0xE59CA2C2UL, 0xA6B62D27UL, 0xE66A4263UL,
0xDF65001FUL, 0x0EC50966UL, 0xDFDD55BCUL, 0x29DE0655UL,
0x911E739AUL, 0x17AF8975UL, 0x32C7911CUL, 0x89F89468UL,
0x0D01E980UL, 0x524755F4UL, 0x03B63CC9UL, 0x0CC844B2UL,
0xBCF3F0AAUL, 0x87AC36E9UL, 0xE53A7426UL, 0x01B3D82BUL,
0x1A9E7449UL, 0x64EE2D7EUL, 0xCDDBB1DAUL, 0x01C94910UL,
0xB868BF80UL, 0x0D26F3FDUL, 0x9342EDE7UL, 0x04A5C284UL,
0x636737B6UL, 0x50F5B616UL, 0xF24766E3UL, 0x8ECA36C1UL,
0x136E05DBUL, 0xFEF18391UL, 0xFB887A37UL, 0xD6E7F7D4UL,
0xC7FB7DC9UL, 0x3063FCDFUL, 0xB6F589DEUL, 0xEC2941DAUL,
0x26E46695UL, 0xB7566419UL, 0xF654EFC5UL, 0xD08D58B7UL,
0x48925401UL, 0xC1BACB7FUL, 0xE5FF550FUL, 0xB6083049UL,
0x5BB5D0E8UL, 0x87D72E5AUL, 0xAB6A6EE1UL, 0x223A66CEUL,
0xC62BF3CDUL, 0x9E0885F9UL, 0x68CB3E47UL, 0x086C010FUL,
0xA21DE820UL, 0xD18B69DEUL, 0xF3F65777UL, 0xFA02C3F6UL,
0x407EDAC3UL, 0xCBB3D550UL, 0x1793084DUL, 0xB0D70EBAUL,
0x0AB378D5UL, 0xD951FB0CUL, 0xDED7DA56UL, 0x4124BBE4UL,
0x94CA0B56UL, 0x0F5755D1UL, 0xE0E1E56EUL, 0x6184B5BEUL,
0x580A249FUL, 0x94F74BC0UL, 0xE327888EUL, 0x9F7B5561UL,
0xC3DC0280UL, 0x05687715UL, 0x646C6BD7UL, 0x44904DB3UL,
0x66B4F0A3UL, 0xC0F1648AUL, 0x697ED5AFUL, 0x49E92FF6UL,
0x309E374FUL, 0x2CB6356AUL, 0x85808573UL, 0x4991F840UL,
0x76F0AE02UL, 0x083BE84DUL, 0x28421C9AUL, 0x44489406UL,
0x736E4CB8UL, 0xC1092910UL, 0x8BC95FC6UL, 0x7D869CF4UL,
0x134F616FUL, 0x2E77118DUL, 0xB31B2BE1UL, 0xAA90B472UL,
0x3CA5D717UL, 0x7D161BBAUL, 0x9CAD9010UL, 0xAF462BA2UL,
0x9FE459D2UL, 0x45D34559UL, 0xD9F2DA13UL, 0xDBC65487UL,
0xF3E4F94EUL, 0x176D486FUL, 0x097C13EAUL, 0x631DA5C7UL,
0x445F7382UL, 0x175683F4UL, 0xCDC66A97UL, 0x70BE0288UL,
0xB3CDCF72UL, 0x6E5DD2F3UL, 0x20936079UL, 0x459B80A5UL,
0xBE60E2DBUL, 0xA9C23101UL, 0xEBA5315CUL, 0x224E42F2UL,
0x1C5C1572UL, 0xF6721B2CUL, 0x1AD2FFF3UL, 0x8C25404EUL,
0x324ED72FUL, 0x4067B7FDUL, 0x0523138EUL, 0x5CA3BC78UL,
0xDC0FD66EUL, 0x75922283UL, 0x784D6B17UL, 0x58EBB16EUL,
0x44094F85UL, 0x3F481D87UL, 0xFCFEAE7BUL, 0x77B5FF76UL,
0x8C2302BFUL, 0xAAF47556UL, 0x5F46B02AUL, 0x2B092801UL,
0x3D38F5F7UL, 0x0CA81F36UL, 0x52AF4A8AUL, 0x66D5E7C0UL,
0xDF3B0874UL, 0x95055110UL, 0x1B5AD7A8UL, 0xF61ED5ADUL,
0x6CF6E479UL, 0x20758184UL, 0xD0CEFA65UL, 0x88F7BE58UL,
0x4A046826UL, 0x0FF6F8F3UL, 0xA09C7F70UL, 0x5346ABA0UL,
0x5CE96C28UL, 0xE176EDA3UL, 0x6BAC307FUL, 0x376829D2UL,
0x85360FA9UL, 0x17E3FE2AUL, 0x24B79767UL, 0xF5A96B20UL,
0xD6CD2595UL, 0x68FF1EBFUL, 0x7555442CUL, 0xF19F06BEUL,
0xF9E0659AUL, 0xEEB9491DUL, 0x34010718UL, 0xBB30CAB8UL,
0xE822FE15UL, 0x88570983UL, 0x750E6249UL, 0xDA627E55UL,
0x5E76FFA8UL, 0xB1534546UL, 0x6D47DE08UL, 0xEFE9E7D4UL
},
{
0xF6FA8F9DUL, 0x2CAC6CE1UL, 0x4CA34867UL, 0xE2337F7CUL,
0x95DB08E7UL, 0x016843B4UL, 0xECED5CBCUL, 0x325553ACUL,
0xBF9F0960UL, 0xDFA1E2EDUL, 0x83F0579DUL, 0x63ED86B9UL,
0x1AB6A6B8UL, 0xDE5EBE39UL, 0xF38FF732UL, 0x8989B138UL,
0x33F14961UL, 0xC01937BDUL, 0xF506C6DAUL, 0xE4625E7EUL,
0xA308EA99UL, 0x4E23E33CUL, 0x79CBD7CCUL, 0x48A14367UL,
0xA3149619UL, 0xFEC94BD5UL, 0xA114174AUL, 0xEAA01866UL,
0xA084DB2DUL, 0x09A8486FUL, 0xA888614AUL, 0x2900AF98UL,
0x01665991UL, 0xE1992863UL, 0xC8F30C60UL, 0x2E78EF3CUL,
0xD0D51932UL, 0xCF0FEC14UL, 0xF7CA07D2UL, 0xD0A82072UL,
0xFD41197EUL, 0x9305A6B0UL, 0xE86BE3DAUL, 0x74BED3CDUL,
0x372DA53CUL, 0x4C7F4448UL, 0xDAB5D440UL, 0x6DBA0EC3UL,
0x083919A7UL, 0x9FBAEED9UL, 0x49DBCFB0UL, 0x4E670C53UL,
0x5C3D9C01UL, 0x64BDB941UL, 0x2C0E636AUL, 0xBA7DD9CDUL,
0xEA6F7388UL, 0xE70BC762UL, 0x35F29ADBUL, 0x5C4CDD8DUL,
0xF0D48D8CUL, 0xB88153E2UL, 0x08A19866UL, 0x1AE2EAC8UL,
0x284CAF89UL, 0xAA928223UL, 0x9334BE53UL, 0x3B3A21BFUL,
0x16434BE3UL, 0x9AEA3906UL, 0xEFE8C36EUL, 0xF890CDD9UL,
0x80226DAEUL, 0xC340A4A3UL, 0xDF7E9C09UL, 0xA694A807UL,
0x5B7C5ECCUL, 0x221DB3A6UL, 0x9A69A02FUL, 0x68818A54UL,
0xCEB2296FUL, 0x53C0843AUL, 0xFE893655UL, 0x25BFE68AUL,
0xB4628ABCUL, 0xCF222EBFUL, 0x25AC6F48UL, 0xA9A99387UL,
0x53BDDB65UL, 0xE76FFBE7UL, 0xE967FD78UL, 0x0BA93563UL,
0x8E342BC1UL, 0xE8A11BE9UL, 0x4980740DUL, 0xC8087DFCUL,
0x8DE4BF99UL, 0xA11101A0UL, 0x7FD37975UL, 0xDA5A26C0UL,
0xE81F994FUL, 0x9528CD89UL, 0xFD339FEDUL, 0xB87834BFUL,
0x5F04456DUL, 0x22258698UL, 0xC9C4C83BUL, 0x2DC156BEUL,
0x4F628DAAUL, 0x57F55EC5UL, 0xE2220ABEUL, 0xD2916EBFUL,
0x4EC75B95UL, 0x24F2C3C0UL, 0x42D15D99UL, 0xCD0D7FA0UL,
0x7B6E27FFUL, 0xA8DC8AF0UL, 0x7345C106UL, 0xF41E232FUL,
0x35162386UL, 0xE6EA8926UL, 0x3333B094UL, 0x157EC6F2UL,
0x372B74AFUL, 0x692573E4UL, 0xE9A9D848UL, 0xF3160289UL,
0x3A62EF1DUL, 0xA787E238UL, 0xF3A5F676UL, 0x74364853UL,
0x20951063UL, 0x4576698DUL, 0xB6FAD407UL, 0x592AF950UL,
0x36F73523UL, 0x4CFB6E87UL, 0x7DA4CEC0UL, 0x6C152DAAUL,
0xCB0396A8UL, 0xC50DFE5DUL, 0xFCD707ABUL, 0x0921C42FUL,
0x89DFF0BBUL, 0x5FE2BE78UL, 0x448F4F33UL, 0x754613C9UL,
0x2B05D08DUL, 0x48B9D585UL, 0xDC049441UL, 0xC8098F9BUL,
0x7DEDE786UL, 0xC39A3373UL, 0x42410005UL, 0x6A091751UL,
0x0EF3C8A6UL, 0x890072D6UL, 0x28207682UL, 0xA9A9F7BEUL,
0xBF32679DUL, 0xD45B5B75UL, 0xB353FD00UL, 0xCBB0E358UL,
0x830F220AUL, 0x1F8FB214UL, 0xD372CF08UL, 0xCC3C4A13UL,
0x8CF63166UL, 0x061C87BEUL, 0x88C98F88UL, 0x6062E397UL,
0x47CF8E7AUL, 0xB6C85283UL, 0x3CC2ACFBUL, 0x3FC06976UL,
0x4E8F0252UL, 0x64D8314DUL, 0xDA3870E3UL, 0x1E665459UL,
0xC10908F0UL, 0x513021A5UL, 0x6C5B68B7UL, 0x822F8AA0UL,
0x3007CD3EUL, 0x74719EEFUL, 0xDC872681UL, 0x073340D4UL,
0x7E432FD9UL, 0x0C5EC241UL, 0x8809286CUL, 0xF592D891UL,
0x08A930F6UL, 0x957EF305UL, 0xB7FBFFBDUL, 0xC266E96FUL,
0x6FE4AC98UL, 0xB173ECC0UL, 0xBC60B42AUL, 0x953498DAUL,
0xFBA1AE12UL, 0x2D4BD736UL, 0x0F25FAABUL, 0xA4F3FCEBUL,
0xE2969123UL, 0x257F0C3DUL, 0x9348AF49UL, 0x361400BCUL,
0xE8816F4AUL, 0x3814F200UL, 0xA3F94043UL, 0x9C7A54C2UL,
0xBC704F57UL, 0xDA41E7F9UL, 0xC25AD33AUL, 0x54F4A084UL,
0xB17F5505UL, 0x59357CBEUL, 0xEDBD15C8UL, 0x7F97C5ABUL,
0xBA5AC7B5UL, 0xB6F6DEAFUL, 0x3A479C3AUL, 0x5302DA25UL,
0x653D7E6AUL, 0x54268D49UL, 0x51A477EAUL, 0x5017D55BUL,
0xD7D25D88UL, 0x44136C76UL, 0x0404A8C8UL, 0xB8E5A121UL,
0xB81A928AUL, 0x60ED5869UL, 0x97C55B96UL, 0xEAEC991BUL,
0x29935913UL, 0x01FDB7F1UL, 0x088E8DFAUL, 0x9AB6F6F5UL,
0x3B4CBF9FUL, 0x4A5DE3ABUL, 0xE6051D35UL, 0xA0E1D855UL,
0xD36B4CF1UL, 0xF544EDEBUL, 0xB0E93524UL, 0xBEBB8FBDUL,
0xA2D762CFUL, 0x49C92F54UL, 0x38B5F331UL, 0x7128A454UL,
0x48392905UL, 0xA65B1DB8UL, 0x851C97BDUL, 0xD675CF2FUL
},
{
0x85E04019UL, 0x332BF567UL, 0x662DBFFFUL, 0xCFC65693UL,
0x2A8D7F6FUL, 0xAB9BC912UL, 0xDE6008A1UL, 0x2028DA1FUL,
0x0227BCE7UL, 0x4D642916UL, 0x18FAC300UL, 0x50F18B82UL,
0x2CB2CB11UL, 0xB232E75CUL, 0x4B3695F2UL, 0xB28707DEUL,
0xA05FBCF6UL, 0xCD4181E9UL, 0xE150210CUL, 0xE24EF1BDUL,
0xB168C381UL, 0xFDE4E789UL, 0x5C79B0D8UL, 0x1E8BFD43UL,
0x4D495001UL, 0x38BE4341UL, 0x913CEE1DUL, 0x92A79C3FUL,
0x089766BEUL, 0xBAEEADF4UL, 0x1286BECFUL, 0xB6EACB19UL,
0x2660C200UL, 0x7565BDE4UL, 0x64241F7AUL, 0x8248DCA9UL,
0xC3B3AD66UL, 0x28136086UL, 0x0BD8DFA8UL, 0x356D1CF2UL,
0x107789BEUL, 0xB3B2E9CEUL, 0x0502AA8FUL, 0x0BC0351EUL,
0x166BF52AUL, 0xEB12FF82UL, 0xE3486911UL, 0xD34D7516UL,
0x4E7B3AFFUL, 0x5F43671BUL, 0x9CF6E037UL, 0x4981AC83UL,
0x334266CEUL, 0x8C9341B7UL, 0xD0D854C0UL, 0xCB3A6C88UL,
0x47BC2829UL, 0x4725BA37UL, 0xA66AD22BUL, 0x7AD61F1EUL,
0x0C5CBAFAUL, 0x4437F107UL, 0xB6E79962UL, 0x42D2D816UL,
0x0A961288UL, 0xE1A5C06EUL, 0x13749E67UL, 0x72FC081AUL,
0xB1D139F7UL, 0xF9583745UL, 0xCF19DF58UL, 0xBEC3F756UL,
0xC06EBA30UL, 0x07211B24UL, 0x45C28829UL, 0xC95E317FUL,
0xBC8EC511UL, 0x38BC46E9UL, 0xC6E6FA14UL, 0xBAE8584AUL,
0xAD4EBC46UL, 0x468F508BUL, 0x7829435FUL, 0xF124183BUL,
0x821DBA9FUL, 0xAFF60FF4UL, 0xEA2C4E6DUL, 0x16E39264UL,
0x92544A8BUL, 0x009B4FC3UL, 0xABA68CEDUL, 0x9AC96F78UL,
0x06A5B79AUL, 0xB2856E6EUL, 0x1AEC3CA9UL, 0xBE838688UL,
0x0E0804E9UL, 0x55F1BE56UL, 0xE7E5363BUL, 0xB3A1F25DUL,
0xF7DEBB85UL, 0x61FE033CUL, 0x16746233UL, 0x3C034C28UL,
0xDA6D0C74UL, 0x79AAC56CUL, 0x3CE4E1ADUL, 0x51F0C802UL,
0x98F8F35AUL, 0x1626A49FUL, 0xEED82B29UL, 0x1D382FE3UL,
0x0C4FB99AUL, 0xBB325778UL, 0x3EC6D97BUL, 0x6E77A6A9UL,
0xCB658B5CUL, 0xD45230C7UL, 0x2BD1408BUL, 0x60C03EB7UL,
0xB9068D78UL, 0xA33754F4UL, 0xF430C87DUL, 0xC8A71302UL,
0xB96D8C32UL, 0xEBD4E7BEUL, 0xBE8B9D2DUL, 0x7979FB06UL,
0xE7225308UL, 0x8B75CF77UL, 0x11EF8DA4UL, 0xE083C858UL,
0x8D6B786FUL, 0x5A6317A6UL, 0xFA5CF7A0UL, 0x5DDA0033UL,
0xF28EBFB0UL, 0xF5B9C310UL, 0xA0EAC280UL, 0x08B9767AUL,
0xA3D9D2B0UL, 0x79D34217UL, 0x021A718DUL, 0x9AC6336AUL,
0x2711FD60UL, 0x438050E3UL, 0x069908A8UL, 0x3D7FEDC4UL,
0x826D2BEFUL, 0x4EEB8476UL, 0x488DCF25UL, 0x36C9D566UL,
0x28E74E41UL, 0xC2610ACAUL, 0x3D49A9CFUL, 0xBAE3B9DFUL,
0xB65F8DE6UL, 0x92AEAF64UL, 0x3AC7D5E6UL, 0x9EA80509UL,
0xF22B017DUL, 0xA4173F70UL, 0xDD1E16C3UL, 0x15E0D7F9UL,
0x50B1B887UL, 0x2B9F4FD5UL, 0x625ABA82UL, 0x6A017962UL,
0x2EC01B9CUL, 0x15488AA9UL, 0xD716E740UL, 0x40055A2CUL,
0x93D29A22UL, 0xE32DBF9AUL, 0x058745B9UL, 0x3453DC1EUL,
0xD699296EUL, 0x496CFF6FUL, 0x1C9F4986UL, 0xDFE2ED07UL,
0xB87242D1UL, 0x19DE7EAEUL, 0x053E561AUL, 0x15AD6F8CUL,
0x66626C1CUL, 0x7154C24CUL, 0xEA082B2AUL, 0x93EB2939UL,
0x17DCB0F0UL, 0x58D4F2AEUL, 0x9EA294FBUL, 0x52CF564CUL,
0x9883FE66UL, 0x2EC40581UL, 0x763953C3UL, 0x01D6692EUL,
0xD3A0C108UL, 0xA1E7160EUL, 0xE4F2DFA6UL, 0x693ED285UL,
0x74904698UL, 0x4C2B0EDDUL, 0x4F757656UL, 0x5D393378UL,
0xA132234FUL, 0x3D321C5DUL, 0xC3F5E194UL, 0x4B269301UL,
0xC79F022FUL, 0x3C997E7EUL, 0x5E4F9504UL, 0x3FFAFBBDUL,
0x76F7AD0EUL, 0x296693F4UL, 0x3D1FCE6FUL, 0xC61E45BEUL,
0xD3B5AB34UL, 0xF72BF9B7UL, 0x1B0434C0UL, 0x4E72B567UL,
0x5592A33DUL, 0xB5229301UL, 0xCFD2A87FUL, 0x60AEB767UL,
0x1814386BUL, 0x30BCC33DUL, 0x38A0C07DUL, 0xFD1606F2UL,
0xC363519BUL, 0x589DD390UL, 0x5479F8E6UL, 0x1CB8D647UL,
0x97FD61A9UL, 0xEA7759F4UL, 0x2D57539DUL, 0x569A58CFUL,
0xE84E63ADUL, 0x462E1B78UL, 0x6580F87EUL, 0xF3817914UL,
0x91DA55F4UL, 0x40A230F3UL, 0xD1988F35UL, 0xB6E318D2UL,
0x3FFA50BCUL, 0x3D40F021UL, 0xC3C0BDAEUL, 0x4958C24CUL,
0x518F36B2UL, 0x84B1D370UL, 0x0FEDCE83UL, 0x878DDADAUL,
0xF2A279C7UL, 0x94E01BE8UL, 0x90716F4BUL, 0x954B8AA3UL
},
{
0xE216300DUL, 0xBBDDFFFCUL, 0xA7EBDABDUL, 0x35648095UL,
0x7789F8B7UL, 0xE6C1121BUL, 0x0E241600UL, 0x052CE8B5UL,
0x11A9CFB0UL, 0xE5952F11UL, 0xECE7990AUL, 0x9386D174UL,
0x2A42931CUL, 0x76E38111UL, 0xB12DEF3AUL, 0x37DDDDFCUL,
0xDE9ADEB1UL, 0x0A0CC32CUL, 0xBE197029UL, 0x84A00940UL,
0xBB243A0FUL, 0xB4D137CFUL, 0xB44E79F0UL, 0x049EEDFDUL,
0x0B15A15DUL, 0x480D3168UL, 0x8BBBDE5AUL, 0x669DED42UL,
0xC7ECE831UL, 0x3F8F95E7UL, 0x72DF191BUL, 0x7580330DUL,
0x94074251UL, 0x5C7DCDFAUL, 0xABBE6D63UL, 0xAA402164UL,
0xB301D40AUL, 0x02E7D1CAUL, 0x53571DAEUL, 0x7A3182A2UL,
0x12A8DDECUL, 0xFDAA335DUL, 0x176F43E8UL, 0x71FB46D4UL,
0x38129022UL, 0xCE949AD4UL, 0xB84769ADUL, 0x965BD862UL,
0x82F3D055UL, 0x66FB9767UL, 0x15B80B4EUL, 0x1D5B47A0UL,
0x4CFDE06FUL, 0xC28EC4B8UL, 0x57E8726EUL, 0x647A78FCUL,
0x99865D44UL, 0x608BD593UL, 0x6C200E03UL, 0x39DC5FF6UL,
0x5D0B00A3UL, 0xAE63AFF2UL, 0x7E8BD632UL, 0x70108C0CUL,
0xBBD35049UL, 0x2998DF04UL, 0x980CF42AUL, 0x9B6DF491UL,
0x9E7EDD53UL, 0x06918548UL, 0x58CB7E07UL, 0x3B74EF2EUL,
0x522FFFB1UL, 0xD24708CCUL, 0x1C7E27CDUL, 0xA4EB215BUL,
0x3CF1D2E2UL, 0x19B47A38UL, 0x424F7618UL, 0x35856039UL,
0x9D17DEE7UL, 0x27EB35E6UL, 0xC9AFF67BUL, 0x36BAF5B8UL,
0x09C467CDUL, 0xC18910B1UL, 0xE11DBF7BUL, 0x06CD1AF8UL,
0x7170C608UL, 0x2D5E3354UL, 0xD4DE495AUL, 0x64C6D006UL,
0xBCC0C62CUL, 0x3DD00DB3UL, 0x708F8F34UL, 0x77D51B42UL,
0x264F620FUL, 0x24B8D2BFUL, 0x15C1B79EUL, 0x46A52564UL,
0xF8D7E54EUL, 0x3E378160UL, 0x7895CDA5UL, 0x859C15A5UL,
0xE6459788UL, 0xC37BC75FUL, 0xDB07BA0CUL, 0x0676A3ABUL,
0x7F229B1EUL, 0x31842E7BUL, 0x24259FD7UL, 0xF8BEF472UL,
0x835FFCB8UL, 0x6DF4C1F2UL, 0x96F5B195UL, 0xFD0AF0FCUL,
0xB0FE134CUL, 0xE2506D3DUL, 0x4F9B12EAUL, 0xF215F225UL,
0xA223736FUL, 0x9FB4C428UL, 0x25D04979UL, 0x34C713F8UL,
0xC4618187UL, 0xEA7A6E98UL, 0x7CD16EFCUL, 0x1436876CUL,
0xF1544107UL, 0xBEDEEE14UL, 0x56E9AF27UL, 0xA04AA441UL,
0x3CF7C899UL, 0x92ECBAE6UL, 0xDD67016DUL, 0x151682EBUL,
0xA842EEDFUL, 0xFDBA60B4UL, 0xF1907B75UL, 0x20E3030FUL,
0x24D8C29EUL, 0xE139673BUL, 0xEFA63FB8UL, 0x71873054UL,
0xB6F2CF3BUL, 0x9F326442UL, 0xCB15A4CCUL, 0xB01A4504UL,
0xF1E47D8DUL, 0x844A1BE5UL, 0xBAE7DFDCUL, 0x42CBDA70UL,
0xCD7DAE0AUL, 0x57E85B7AUL, 0xD53F5AF6UL, 0x20CF4D8CUL,
0xCEA4D428UL, 0x79D130A4UL, 0x3486EBFBUL, 0x33D3CDDCUL,
0x77853B53UL, 0x37EFFCB5UL, 0xC5068778UL, 0xE580B3E6UL,
0x4E68B8F4UL, 0xC5C8B37EUL, 0x0D809EA2UL, 0x398FEB7CUL,
0x132A4F94UL, 0x43B7950EUL, 0x2FEE7D1CUL, 0x223613BDUL,
0xDD06CAA2UL, 0x37DF932BUL, 0xC4248289UL, 0xACF3EBC3UL,
0x5715F6B7UL, 0xEF3478DDUL, 0xF267616FUL, 0xC148CBE4UL,
0x9052815EUL, 0x5E410FABUL, 0xB48A2465UL, 0x2EDA7FA4UL,
0xE87B40E4UL, 0xE98EA084UL, 0x5889E9E1UL, 0xEFD390FCUL,
0xDD07D35BUL, 0xDB485694UL, 0x38D7E5B2UL, 0x57720101UL,
0x730EDEBCUL, 0x5B643113UL, 0x94917E4FUL, 0x503C2FBAUL,
0x646F1282UL, 0x7523D24AUL, 0xE0779695UL, 0xF9C17A8FUL,
0x7A5B2121UL, 0xD187B896UL, 0x29263A4DUL, 0xBA510CDFUL,
0x81F47C9FUL, 0xAD1163EDUL, 0xEA7B5965UL, 0x1A00726EUL,
0x11403092UL, 0x00DA6D77UL, 0x4A0CDD61UL, 0xAD1F4603UL,
0x605BDFB0UL, 0x9EEDC364UL, 0x22EBE6A8UL, 0xCEE7D28AUL,
0xA0E736A0UL, 0x5564A6B9UL, 0x10853209UL, 0xC7EB8F37UL,
0x2DE705CAUL, 0x8951570FUL, 0xDF09822BUL, 0xBD691A6CUL,
0xAA12E4F2UL, 0x87451C0FUL, 0xE0F6A27AUL, 0x3ADA4819UL,
0x4CF1764FUL, 0x0D771C2BUL, 0x67CDB156UL, 0x350D8384UL,
0x5938FA0FUL, 0x42399EF3UL, 0x36997B07UL, 0x0E84093DUL,
0x4AA93E61UL, 0x8360D87BUL, 0x1FA98B0CUL, 0x1149382CUL,
0xE97625A5UL, 0x0614D1B7UL, 0x0E25244BUL, 0x0C768347UL,
0x589E8D82UL, 0x0D2059D1UL, 0xA466BB1EUL, 0xF8DA0A82UL,
0x04F19130UL, 0xBA6E4EC0UL, 0x99265164UL, 0x1EE7230DUL,
0x50B2AD80UL, 0xEAEE6801UL, 0x8DB2A283UL, 0xEA8BF59EUL
}
};
/* CAST uses three different round functions */
#define _CAST_f1(l, r, km, kr) \
t = rotl32_S(km + r, kr); \
l ^= ((s_S[0][U8a(t)] ^ s_S[1][U8b(t)]) - \
s_S[2][U8c(t)]) + s_S[3][U8d(t)];
#define _CAST_f2(l, r, km, kr) \
t = rotl32_S(km ^ r, kr); \
l ^= ((s_S[0][U8a(t)] - s_S[1][U8b(t)]) + \
s_S[2][U8c(t)]) ^ s_S[3][U8d(t)];
#define _CAST_f3(l, r, km, kr) \
t = rotl32_S(km - r, kr); \
l ^= ((s_S[0][U8a(t)] + s_S[1][U8b(t)]) ^ \
s_S[2][U8c(t)]) - s_S[3][U8d(t)];
#define _CAST_F1(l, r, i, j) _CAST_f1(l, r, K[i], K[i+j])
#define _CAST_F2(l, r, i, j) _CAST_f2(l, r, K[i], K[i+j])
#define _CAST_F3(l, r, i, j) _CAST_f3(l, r, K[i], K[i+j])
inline void Cast5Encrypt(PRIVATE_AS const u8 *inBlock, PRIVATE_AS u8 *outBlock, PRIVATE_AS CAST_KEY *key, SHM_TYPE u32 (*s_S)[256])
{
uint l; GET_UINT32BE(l, inBlock, 0);
uint r; GET_UINT32BE(r, inBlock, 4);
PRIVATE_AS uint *K = key->K;
uint t;
/* Do the work */
_CAST_F1(l, r, 0, 16);
_CAST_F2(r, l, 1, 16);
_CAST_F3(l, r, 2, 16);
_CAST_F1(r, l, 3, 16);
_CAST_F2(l, r, 4, 16);
_CAST_F3(r, l, 5, 16);
_CAST_F1(l, r, 6, 16);
_CAST_F2(r, l, 7, 16);
_CAST_F3(l, r, 8, 16);
_CAST_F1(r, l, 9, 16);
_CAST_F2(l, r, 10, 16);
_CAST_F3(r, l, 11, 16);
_CAST_F1(l, r, 12, 16);
_CAST_F2(r, l, 13, 16);
_CAST_F3(l, r, 14, 16);
_CAST_F1(r, l, 15, 16);
/* Put l,r into outblock */
PUT_UINT32BE(r, outBlock, 0);
PUT_UINT32BE(l, outBlock, 4);
}
inline void Cast5Decrypt(PRIVATE_AS const u8 *inBlock, PRIVATE_AS u8 *outBlock, PRIVATE_AS CAST_KEY *key, SHM_TYPE u32 (*s_S)[256])
{
uint l; GET_UINT32BE(l, inBlock, 0);
uint r; GET_UINT32BE(r, inBlock, 4);
PRIVATE_AS uint *K = key->K;
uint t;
/* Only do full 16 rounds if key length > 80 bits */
_CAST_F1(r, l, 15, 16);
_CAST_F3(l, r, 14, 16);
_CAST_F2(r, l, 13, 16);
_CAST_F1(l, r, 12, 16);
_CAST_F3(r, l, 11, 16);
_CAST_F2(l, r, 10, 16);
_CAST_F1(r, l, 9, 16);
_CAST_F3(l, r, 8, 16);
_CAST_F2(r, l, 7, 16);
_CAST_F1(l, r, 6, 16);
_CAST_F3(r, l, 5, 16);
_CAST_F2(l, r, 4, 16);
_CAST_F1(r, l, 3, 16);
_CAST_F3(l, r, 2, 16);
_CAST_F2(r, l, 1, 16);
_CAST_F1(l, r, 0, 16);
/* Put l,r into outblock */
PUT_UINT32BE(r, outBlock, 0);
PUT_UINT32BE(l, outBlock, 4);
/* Wipe clean */
t = l = r = 0;
}
inline void Cast5SetKey(PRIVATE_AS CAST_KEY *key, uint keylength, PRIVATE_AS const u8 *userKey, SHM_TYPE u32 (*s_S)[256])
{
uint i;
PRIVATE_AS uint *K = key->K;
uint X[4], Z[4];
GET_UINT32BE(X[0], userKey, 0);
GET_UINT32BE(X[1], userKey, 4);
GET_UINT32BE(X[2], userKey, 8);
GET_UINT32BE(X[3], userKey, 12);
#define x(i) GETBYTE(X[i/4], 3-i%4)
#define z(i) GETBYTE(Z[i/4], 3-i%4)
for (i=0; i<=16; i+=16) {
// this part is copied directly from RFC 2144 (with some search and replace) by Wei Dai
Z[0] = X[0] ^ s_S[4][x(0xD)] ^ s_S[5][x(0xF)] ^ s_S[6][x(0xC)] ^ s_S[7][x(0xE)] ^ s_S[6][x(0x8)];
Z[1] = X[2] ^ s_S[4][z(0x0)] ^ s_S[5][z(0x2)] ^ s_S[6][z(0x1)] ^ s_S[7][z(0x3)] ^ s_S[7][x(0xA)];
Z[2] = X[3] ^ s_S[4][z(0x7)] ^ s_S[5][z(0x6)] ^ s_S[6][z(0x5)] ^ s_S[7][z(0x4)] ^ s_S[4][x(0x9)];
Z[3] = X[1] ^ s_S[4][z(0xA)] ^ s_S[5][z(0x9)] ^ s_S[6][z(0xB)] ^ s_S[7][z(0x8)] ^ s_S[5][x(0xB)];
K[i+0] = s_S[4][z(0x8)] ^ s_S[5][z(0x9)] ^ s_S[6][z(0x7)] ^ s_S[7][z(0x6)] ^ s_S[4][z(0x2)];
K[i+1] = s_S[4][z(0xA)] ^ s_S[5][z(0xB)] ^ s_S[6][z(0x5)] ^ s_S[7][z(0x4)] ^ s_S[5][z(0x6)];
K[i+2] = s_S[4][z(0xC)] ^ s_S[5][z(0xD)] ^ s_S[6][z(0x3)] ^ s_S[7][z(0x2)] ^ s_S[6][z(0x9)];
K[i+3] = s_S[4][z(0xE)] ^ s_S[5][z(0xF)] ^ s_S[6][z(0x1)] ^ s_S[7][z(0x0)] ^ s_S[7][z(0xC)];
X[0] = Z[2] ^ s_S[4][z(0x5)] ^ s_S[5][z(0x7)] ^ s_S[6][z(0x4)] ^ s_S[7][z(0x6)] ^ s_S[6][z(0x0)];
X[1] = Z[0] ^ s_S[4][x(0x0)] ^ s_S[5][x(0x2)] ^ s_S[6][x(0x1)] ^ s_S[7][x(0x3)] ^ s_S[7][z(0x2)];
X[2] = Z[1] ^ s_S[4][x(0x7)] ^ s_S[5][x(0x6)] ^ s_S[6][x(0x5)] ^ s_S[7][x(0x4)] ^ s_S[4][z(0x1)];
X[3] = Z[3] ^ s_S[4][x(0xA)] ^ s_S[5][x(0x9)] ^ s_S[6][x(0xB)] ^ s_S[7][x(0x8)] ^ s_S[5][z(0x3)];
K[i+4] = s_S[4][x(0x3)] ^ s_S[5][x(0x2)] ^ s_S[6][x(0xC)] ^ s_S[7][x(0xD)] ^ s_S[4][x(0x8)];
K[i+5] = s_S[4][x(0x1)] ^ s_S[5][x(0x0)] ^ s_S[6][x(0xE)] ^ s_S[7][x(0xF)] ^ s_S[5][x(0xD)];
K[i+6] = s_S[4][x(0x7)] ^ s_S[5][x(0x6)] ^ s_S[6][x(0x8)] ^ s_S[7][x(0x9)] ^ s_S[6][x(0x3)];
K[i+7] = s_S[4][x(0x5)] ^ s_S[5][x(0x4)] ^ s_S[6][x(0xA)] ^ s_S[7][x(0xB)] ^ s_S[7][x(0x7)];
Z[0] = X[0] ^ s_S[4][x(0xD)] ^ s_S[5][x(0xF)] ^ s_S[6][x(0xC)] ^ s_S[7][x(0xE)] ^ s_S[6][x(0x8)];
Z[1] = X[2] ^ s_S[4][z(0x0)] ^ s_S[5][z(0x2)] ^ s_S[6][z(0x1)] ^ s_S[7][z(0x3)] ^ s_S[7][x(0xA)];
Z[2] = X[3] ^ s_S[4][z(0x7)] ^ s_S[5][z(0x6)] ^ s_S[6][z(0x5)] ^ s_S[7][z(0x4)] ^ s_S[4][x(0x9)];
Z[3] = X[1] ^ s_S[4][z(0xA)] ^ s_S[5][z(0x9)] ^ s_S[6][z(0xB)] ^ s_S[7][z(0x8)] ^ s_S[5][x(0xB)];
K[i+8] = s_S[4][z(0x3)] ^ s_S[5][z(0x2)] ^ s_S[6][z(0xC)] ^ s_S[7][z(0xD)] ^ s_S[4][z(0x9)];
K[i+9] = s_S[4][z(0x1)] ^ s_S[5][z(0x0)] ^ s_S[6][z(0xE)] ^ s_S[7][z(0xF)] ^ s_S[5][z(0xC)];
K[i+10] = s_S[4][z(0x7)] ^ s_S[5][z(0x6)] ^ s_S[6][z(0x8)] ^ s_S[7][z(0x9)] ^ s_S[6][z(0x2)];
K[i+11] = s_S[4][z(0x5)] ^ s_S[5][z(0x4)] ^ s_S[6][z(0xA)] ^ s_S[7][z(0xB)] ^ s_S[7][z(0x6)];
X[0] = Z[2] ^ s_S[4][z(0x5)] ^ s_S[5][z(0x7)] ^ s_S[6][z(0x4)] ^ s_S[7][z(0x6)] ^ s_S[6][z(0x0)];
X[1] = Z[0] ^ s_S[4][x(0x0)] ^ s_S[5][x(0x2)] ^ s_S[6][x(0x1)] ^ s_S[7][x(0x3)] ^ s_S[7][z(0x2)];
X[2] = Z[1] ^ s_S[4][x(0x7)] ^ s_S[5][x(0x6)] ^ s_S[6][x(0x5)] ^ s_S[7][x(0x4)] ^ s_S[4][z(0x1)];
X[3] = Z[3] ^ s_S[4][x(0xA)] ^ s_S[5][x(0x9)] ^ s_S[6][x(0xB)] ^ s_S[7][x(0x8)] ^ s_S[5][z(0x3)];
K[i+12] = s_S[4][x(0x8)] ^ s_S[5][x(0x9)] ^ s_S[6][x(0x7)] ^ s_S[7][x(0x6)] ^ s_S[4][x(0x3)];
K[i+13] = s_S[4][x(0xA)] ^ s_S[5][x(0xB)] ^ s_S[6][x(0x5)] ^ s_S[7][x(0x4)] ^ s_S[5][x(0x7)];
K[i+14] = s_S[4][x(0xC)] ^ s_S[5][x(0xD)] ^ s_S[6][x(0x3)] ^ s_S[7][x(0x2)] ^ s_S[6][x(0x8)];
K[i+15] = s_S[4][x(0xE)] ^ s_S[5][x(0xF)] ^ s_S[6][x(0x1)] ^ s_S[7][x(0x0)] ^ s_S[7][x(0xD)];
}
uint data[32];
for (i = 0; i < 16; i++) {
data[i * 2] = K[i];
data[i * 2 + 1] = ((K[i + 16]) + 16) & 0x1f; // here only the lowest 5 bits are set..
}
for (i=16; i<32; i++)
K[i] &= 0x1f;
}
/* OpenSSL API compatibility */
#define CAST_set_key(ckey, len, key) Cast5SetKey(ckey, len, key)
#define CAST_ecb_encrypt(in, out, ckey) Cast5Encrypt(in, out, ckey)
#define CAST_ecb_decrypt(in, out, ckey) Cast5Decrypt(in, out, ckey)
#endif /* _OPENCL_CAST_H */

@ -1317,6 +1317,16 @@ DECLSPEC u64x hc_swap64 (const u64x v)
asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tr.sf), "r"(tl.sf));
#endif
#elif defined IS_METAL
const u32x a0 = h32_from_64 (v);
const u32x a1 = l32_from_64 (v);
u32x t0 = hc_swap32 (a0);
u32x t1 = hc_swap32 (a1);
r = hl32_to_64 (t1, t0);
#else
#if defined USE_BITSELECT && defined USE_ROTATE
@ -1380,7 +1390,19 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
#elif defined IS_METAL
const u32 v0 = h32_from_64_S (v);
const u32 v1 = l32_from_64_S (v);
u32 t0 = hc_swap32_S (v0);
u32 t1 = hc_swap32_S (v1);
r = hl32_to_64_S (t1, t0);
#else
#ifdef USE_SWIZZLE
r = as_ulong (as_uchar8 (v).s76543210);
#else

@ -11,7 +11,7 @@
*
* There are four variables where major differences occur:
*
* - P2: Adress space of kernel_rules_t struct.
* - P2: Address space of kernel_rules_t struct.
* If the kernel uses rules_buf, it will be stored in CONSTANT_AS.
* If it does not, cheaper GLOBAL_AS space is used.
*

@ -2024,7 +2024,7 @@ DECLSPEC void point_mul (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *k, SECP256K1_T
* @param r out: x and y coordinates.
* @param x in: x coordinate which should be converted, a pointer to an u32 array with a size of 8.
* @param first_byte in: The parity of the y coordinate, a u32.
* @return Returns 0 if successfull, returns 1 if x is greater than the basepoint.
* @return Returns 0 if successful, returns 1 if x is greater than the basepoint.
*/
DECLSPEC u32 transform_public (PRIVATE_AS secp256k1_t *r, PRIVATE_AS const u32 *x, const u32 first_byte)
{
@ -2084,7 +2084,7 @@ DECLSPEC u32 transform_public (PRIVATE_AS secp256k1_t *r, PRIVATE_AS const u32 *
* Parse a x coordinate with leading parity to secp256k1_t.
* @param r out: x and y coordinates.
* @param k in: x coordinate which should be converted with leading parity, a pointer to an u32 array with a size of 9.
* @return Returns 0 if successfull, returns 1 if x is greater than the basepoint or the parity has an unexpected value.
* @return Returns 0 if successful, returns 1 if x is greater than the basepoint or the parity has an unexpected value.
*/
DECLSPEC u32 parse_public (PRIVATE_AS secp256k1_t *r, PRIVATE_AS const u32 *k)
{

@ -26,7 +26,7 @@ DECLSPEC u32 blake2s_rot16_S (const u32 a)
DECLSPEC u32x blake2s_rot16 (const u32x a)
{
u32x r;
#if VECT_SIZE == 1
r = blake2s_rot16_S (a);
#endif
@ -346,7 +346,7 @@ DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u3
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
blake2s_update_64 (ctx, w0, w1, w2, w3, 64);
}
@ -447,11 +447,11 @@ DECLSPEC void blake2s_transform_vector (PRIVATE_AS u32x *h, PRIVATE_AS const u32
v[ 9] = BLAKE2S_IV_01;
v[10] = BLAKE2S_IV_02;
v[11] = BLAKE2S_IV_03;
v[12] = BLAKE2S_IV_04 ^ t0;
v[12] = make_u32x (BLAKE2S_IV_04) ^ t0;
v[13] = BLAKE2S_IV_05; // ^ t1;
v[14] = BLAKE2S_IV_06 ^ f0;
v[15] = BLAKE2S_IV_07; // ^ f1;
BLAKE2S_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
BLAKE2S_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
BLAKE2S_ROUND_VECTOR (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4);

@ -41,7 +41,7 @@ DECLSPEC void sm3_transform (PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1,
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
// SM3 main loop, composed of 64 rounds (0 to 63).
// The Compression Function (CF) and Message Expansion (ME) are executed step-by-step.
// SM3_ROUND1_S use SM3_FF0 and SM3_GG0 functions for index 0 to 15 and SM3_ROUND2_S use SM3_FF1 and SM3_GG1 functions for index 16 to 63.
@ -93,7 +93,7 @@ DECLSPEC void sm3_transform (PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1,
wc_t = SM3_EXPAND_S(wc_t, w3_t, w9_t, wf_t, w6_t); SM3_ROUND2_S(a, b, c, d, e, f, g, h, SM3_T40, w8_t, w8_t ^ wc_t);
wd_t = SM3_EXPAND_S(wd_t, w4_t, wa_t, w0_t, w7_t); SM3_ROUND2_S(d, a, b, c, h, e, f, g, SM3_T41, w9_t, w9_t ^ wd_t);
we_t = SM3_EXPAND_S(we_t, w5_t, wb_t, w1_t, w8_t); SM3_ROUND2_S(c, d, a, b, g, h, e, f, SM3_T42, wa_t, wa_t ^ we_t);
wf_t = SM3_EXPAND_S(wf_t, w6_t, wc_t, w2_t, w9_t); SM3_ROUND2_S(b, c, d, a, f, g, h, e, SM3_T43, wb_t, wb_t ^ wf_t);
wf_t = SM3_EXPAND_S(wf_t, w6_t, wc_t, w2_t, w9_t); SM3_ROUND2_S(b, c, d, a, f, g, h, e, SM3_T43, wb_t, wb_t ^ wf_t);
w0_t = SM3_EXPAND_S(w0_t, w7_t, wd_t, w3_t, wa_t); SM3_ROUND2_S(a, b, c, d, e, f, g, h, SM3_T44, wc_t, wc_t ^ w0_t);
w1_t = SM3_EXPAND_S(w1_t, w8_t, we_t, w4_t, wb_t); SM3_ROUND2_S(d, a, b, c, h, e, f, g, SM3_T45, wd_t, wd_t ^ w1_t);
w2_t = SM3_EXPAND_S(w2_t, w9_t, wf_t, w5_t, wc_t); SM3_ROUND2_S(c, d, a, b, g, h, e, f, SM3_T46, we_t, we_t ^ w2_t);
@ -928,7 +928,7 @@ DECLSPEC void sm3_final (PRIVATE_AS sm3_ctx_t *ctx)
const int pos = ctx->len & 63;
append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);
if (pos >= 56)
{
sm3_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
@ -986,7 +986,7 @@ DECLSPEC void sm3_transform_vector (PRIVATE_AS const u32x *w0, PRIVATE_AS const
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
// SM3 main loop, composed of 64 rounds (0 to 63).
// The Compression Function (CF) and Message Expansion (ME) are executed step-by-step.
// SM3_ROUND1 use SM3_FF0 and SM3_GG0 functions for index 0 to 15 and SM3_ROUND2 use SM3_FF1 and SM3_GG1 functions for index 16 to 63.
@ -1038,7 +1038,7 @@ DECLSPEC void sm3_transform_vector (PRIVATE_AS const u32x *w0, PRIVATE_AS const
wc_t = SM3_EXPAND(wc_t, w3_t, w9_t, wf_t, w6_t); SM3_ROUND2(a, b, c, d, e, f, g, h, SM3_T40, w8_t, w8_t ^ wc_t);
wd_t = SM3_EXPAND(wd_t, w4_t, wa_t, w0_t, w7_t); SM3_ROUND2(d, a, b, c, h, e, f, g, SM3_T41, w9_t, w9_t ^ wd_t);
we_t = SM3_EXPAND(we_t, w5_t, wb_t, w1_t, w8_t); SM3_ROUND2(c, d, a, b, g, h, e, f, SM3_T42, wa_t, wa_t ^ we_t);
wf_t = SM3_EXPAND(wf_t, w6_t, wc_t, w2_t, w9_t); SM3_ROUND2(b, c, d, a, f, g, h, e, SM3_T43, wb_t, wb_t ^ wf_t);
wf_t = SM3_EXPAND(wf_t, w6_t, wc_t, w2_t, w9_t); SM3_ROUND2(b, c, d, a, f, g, h, e, SM3_T43, wb_t, wb_t ^ wf_t);
w0_t = SM3_EXPAND(w0_t, w7_t, wd_t, w3_t, wa_t); SM3_ROUND2(a, b, c, d, e, f, g, h, SM3_T44, wc_t, wc_t ^ w0_t);
w1_t = SM3_EXPAND(w1_t, w8_t, we_t, w4_t, wb_t); SM3_ROUND2(d, a, b, c, h, e, f, g, SM3_T45, wd_t, wd_t ^ w1_t);
w2_t = SM3_EXPAND(w2_t, w9_t, wf_t, w5_t, wc_t); SM3_ROUND2(c, d, a, b, g, h, e, f, SM3_T46, we_t, we_t ^ w2_t);

@ -112,6 +112,15 @@ typedef u64 u64x;
#if defined IS_CUDA || defined IS_HIP
#ifndef __device_builtin__
#define __device_builtin__
#endif
#ifndef __builtin_align__
#define __builtin_align__(x)
#endif
#if VECT_SIZE == 2
struct __device_builtin__ __builtin_align__(2) u8x

@ -0,0 +1,744 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m02630_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len);
const u32x pw_salt_len = out_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* md5
*/
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_salt_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w8_t = 0x80;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 32 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m02630_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m02630_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m02630_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len);
const u32x pw_salt_len = out_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* md5
*/
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_salt_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w8_t = 0x80;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 32 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m02630_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m02630_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,259 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m02630_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_update (&ctx0, s, salt_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.w2[0] = 0x80;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 32 * 8;
ctx.w3[3] = 0;
md5_transform (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.h);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m02630_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_update (&ctx0, s, salt_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.w2[0] = 0x80;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 32 * 8;
ctx.w3[3] = 0;
md5_transform (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.h);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,862 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m02630_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* md5
*/
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_salt_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w8_t = 0x80;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 32 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m02630_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m02630_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m02630_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* md5
*/
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_salt_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w8_t = 0x80;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 32 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m02630_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m02630_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,257 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m02630_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_update (&ctx1, s, salt_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.len = 32;
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.w2[0] = 0x80;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 32 * 8;
ctx.w3[3] = 0;
md5_transform (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.h);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m02630_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_update (&ctx1, s, salt_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.len = 32;
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.w2[0] = 0x80;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 32 * 8;
ctx.w3[3] = 0;
md5_transform (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.h);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,283 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m02630_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0lr = w0l | w0r;
w[0] = w0lr;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_update_vector (&ctx0, s, salt_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
ctx.len = 32;
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.w2[0] = 0x80;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 32 * 8;
ctx.w3[3] = 0;
md5_transform_vector (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.h);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m02630_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0lr = w0l | w0r;
w[0] = w0lr;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_update_vector (&ctx0, s, salt_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
ctx.len = 32;
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.w2[0] = 0x80;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 32 * 8;
ctx.w3[3] = 0;
md5_transform_vector (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.h);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -339,7 +339,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)

@ -0,0 +1,820 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m03610_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5];
salt_buf1[2] = 0;
salt_buf1[3] = 0;
salt_buf2[0] = 0;
salt_buf2[1] = 0;
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
w3[2] = out_len * 8;
w3[3] = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0x80;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 32 * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = salt_buf0[0];
w2[1] = salt_buf0[1];
w2[2] = salt_buf0[2];
w2[3] = salt_buf0[3];
w3[0] = salt_buf1[0];
w3[1] = salt_buf1[1];
w3[2] = (32 + salt_len) * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m03610_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m03610_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m03610_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5];
salt_buf1[2] = 0;
salt_buf1[3] = 0;
salt_buf2[0] = 0;
salt_buf2[1] = 0;
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
w3[2] = out_len * 8;
w3[3] = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0x80;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 32 * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = salt_buf0[0];
w2[1] = salt_buf0[1];
w2[2] = salt_buf0[2];
w2[3] = salt_buf0[3];
w3[0] = salt_buf1[0];
w3[1] = salt_buf1[1];
w3[2] = (32 + salt_len) * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m03610_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m03610_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,307 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m03610_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
u32 a = ctx0.h[0];
u32 b = ctx0.h[1];
u32 c = ctx0.h[2];
u32 d = ctx0.h[3];
md5_ctx_t ctx1;
md5_init (&ctx1);
ctx1.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx1.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx1.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx1.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx1.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx1.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx1.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx1.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx1.len = 32;
md5_final (&ctx1);
a = ctx1.h[0];
b = ctx1.h[1];
c = ctx1.h[2];
d = ctx1.h[3];
md5_ctx_t ctx2;
md5_init (&ctx2);
ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx2.len = 32;
md5_update (&ctx2, s, salt_len);
md5_final (&ctx2);
const u32 r0 = ctx2.h[DGST_R0];
const u32 r1 = ctx2.h[DGST_R1];
const u32 r2 = ctx2.h[DGST_R2];
const u32 r3 = ctx2.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m03610_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
u32 a = ctx0.h[0];
u32 b = ctx0.h[1];
u32 c = ctx0.h[2];
u32 d = ctx0.h[3];
md5_ctx_t ctx1;
md5_init (&ctx1);
ctx1.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx1.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx1.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx1.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx1.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx1.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx1.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx1.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx1.len = 32;
md5_final (&ctx1);
a = ctx1.h[0];
b = ctx1.h[1];
c = ctx1.h[2];
d = ctx1.h[3];
md5_ctx_t ctx2;
md5_init (&ctx2);
ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx2.len = 32;
md5_update (&ctx2, s, salt_len);
md5_final (&ctx2);
const u32 r0 = ctx2.h[DGST_R0];
const u32 r1 = ctx2.h[DGST_R1];
const u32 r2 = ctx2.h[DGST_R2];
const u32 r3 = ctx2.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,934 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m03610_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5];
salt_buf1[2] = 0;
salt_buf1[3] = 0;
salt_buf2[0] = 0;
salt_buf2[1] = 0;
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = pw_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0x80;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 32 * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = salt_buf0[0];
w2[1] = salt_buf0[1];
w2[2] = salt_buf0[2];
w2[3] = salt_buf0[3];
w3[0] = salt_buf1[0];
w3[1] = salt_buf1[1];
w3[2] = (32 + salt_len) * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m03610_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m03610_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m03610_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5];
salt_buf1[2] = 0;
salt_buf1[3] = 0;
salt_buf2[0] = 0;
salt_buf2[1] = 0;
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = pw_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0x80;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 32 * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += make_u32x (MD5M_A);
b += make_u32x (MD5M_B);
c += make_u32x (MD5M_C);
d += make_u32x (MD5M_D);
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = salt_buf0[0];
w2[1] = salt_buf0[1];
w2[2] = salt_buf0[2];
w2[3] = salt_buf0[3];
w3[0] = salt_buf1[0];
w3[1] = salt_buf1[1];
w3[2] = (32 + salt_len) * 8;
w3[3] = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m03610_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m03610_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,301 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m03610_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
u32 a = ctx1.h[0];
u32 b = ctx1.h[1];
u32 c = ctx1.h[2];
u32 d = ctx1.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_final (&ctx);
a = ctx.h[0];
b = ctx.h[1];
c = ctx.h[2];
d = ctx.h[3];
md5_ctx_t ctx2;
md5_init (&ctx2);
ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx2.len = 32;
md5_update (&ctx2, s, salt_len);
md5_final (&ctx2);
const u32 r0 = ctx2.h[DGST_R0];
const u32 r1 = ctx2.h[DGST_R1];
const u32 r2 = ctx2.h[DGST_R2];
const u32 r3 = ctx2.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m03610_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
u32 a = ctx1.h[0];
u32 b = ctx1.h[1];
u32 c = ctx1.h[2];
u32 d = ctx1.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_final (&ctx);
a = ctx.h[0];
b = ctx.h[1];
c = ctx.h[2];
d = ctx.h[3];
md5_ctx_t ctx2;
md5_init (&ctx2);
ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx2.len = 32;
md5_update (&ctx2, s, salt_len);
md5_final (&ctx2);
const u32 r0 = ctx2.h[DGST_R0];
const u32 r1 = ctx2.h[DGST_R1];
const u32 r2 = ctx2.h[DGST_R2];
const u32 r3 = ctx2.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,327 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m03610_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
u32x a = ctx0.h[0];
u32x b = ctx0.h[1];
u32x c = ctx0.h[2];
u32x d = ctx0.h[3];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_final_vector (&ctx);
a = ctx.h[0];
b = ctx.h[1];
c = ctx.h[2];
d = ctx.h[3];
md5_ctx_vector_t ctx2;
md5_init_vector (&ctx2);
ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx2.len = 32;
md5_update_vector (&ctx2, s, salt_len);
md5_final_vector (&ctx2);
const u32x r0 = ctx2.h[DGST_R0];
const u32x r1 = ctx2.h[DGST_R1];
const u32x r2 = ctx2.h[DGST_R2];
const u32x r3 = ctx2.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m03610_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
u32x a = ctx0.h[0];
u32x b = ctx0.h[1];
u32x c = ctx0.h[2];
u32x d = ctx0.h[3];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_final_vector (&ctx);
a = ctx.h[0];
b = ctx.h[1];
c = ctx.h[2];
d = ctx.h[3];
md5_ctx_vector_t ctx2;
md5_init_vector (&ctx2);
ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx2.len = 32;
md5_update_vector (&ctx2, s, salt_len);
md5_final_vector (&ctx2);
const u32x r0 = ctx2.h[DGST_R0];
const u32x r1 = ctx2.h[DGST_R1];
const u32x r2 = ctx2.h[DGST_R2];
const u32x r3 = ctx2.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -80,7 +80,7 @@ KERNEL_FQ void m03730_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
const u32 salt_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len;
u32 s[64] = { 0 };
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
@ -89,7 +89,7 @@ KERNEL_FQ void m03730_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
const u32 salt_len2 = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 s2[64] = { 0 };
u32x s2[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len2; i += 4, idx += 1)
{
@ -232,7 +232,7 @@ KERNEL_FQ void m03730_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
const u32 salt_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len;
u32 s[64] = { 0 };
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
@ -241,7 +241,7 @@ KERNEL_FQ void m03730_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
const u32 salt_len2 = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 s2[64] = { 0 };
u32x s2[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len2; i += 4, idx += 1)
{

@ -641,10 +641,10 @@ KERNEL_FQ void m04410_m04 (KERN_ATTR_RULES ())
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += digest[0] - MD5M_A;
b += digest[1] - MD5M_B;
c += digest[2] - MD5M_C;
d += digest[3] - MD5M_D;
a += digest[0] - make_u32x (MD5M_A);
b += digest[1] - make_u32x (MD5M_B);
c += digest[2] - make_u32x (MD5M_C);
d += digest[3] - make_u32x (MD5M_D);
COMPARE_M_SIMD (a, d, c, b);
}
@ -1285,10 +1285,10 @@ KERNEL_FQ void m04410_s04 (KERN_ATTR_RULES ())
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += digest[0] - MD5M_A;
b += digest[1] - MD5M_B;
c += digest[2] - MD5M_C;
d += digest[3] - MD5M_D;
a += digest[0] - make_u32x (MD5M_A);
b += digest[1] - make_u32x (MD5M_B);
c += digest[2] - make_u32x (MD5M_C);
d += digest[3] - make_u32x (MD5M_D);
COMPARE_S_SIMD (a, d, c, b);
}

@ -697,10 +697,10 @@ KERNEL_FQ void m04410_m04 (KERN_ATTR_BASIC ())
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += digest[0] - MD5M_A;
b += digest[1] - MD5M_B;
c += digest[2] - MD5M_C;
d += digest[3] - MD5M_D;
a += digest[0] - make_u32x (MD5M_A);
b += digest[1] - make_u32x (MD5M_B);
c += digest[2] - make_u32x (MD5M_C);
d += digest[3] - make_u32x (MD5M_D);
COMPARE_M_SIMD (a, d, c, b);
}
@ -1399,10 +1399,10 @@ KERNEL_FQ void m04410_s04 (KERN_ATTR_BASIC ())
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += digest[0] - MD5M_A;
b += digest[1] - MD5M_B;
c += digest[2] - MD5M_C;
d += digest[3] - MD5M_D;
a += digest[0] - make_u32x (MD5M_A);
b += digest[1] - make_u32x (MD5M_B);
c += digest[2] - make_u32x (MD5M_C);
d += digest[3] - make_u32x (MD5M_D);
COMPARE_S_SIMD (a, d, c, b);
}

@ -595,10 +595,10 @@ DECLSPEC void m04410m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += digest[0] - MD5M_A;
b += digest[1] - MD5M_B;
c += digest[2] - MD5M_C;
d += digest[3] - MD5M_D;
a += digest[0] - make_u32x (MD5M_A);
b += digest[1] - make_u32x (MD5M_B);
c += digest[2] - make_u32x (MD5M_C);
d += digest[3] - make_u32x (MD5M_D);
COMPARE_M_SIMD (a, d, c, b);
}
@ -1187,10 +1187,10 @@ DECLSPEC void m04410s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += digest[0] - MD5M_A;
b += digest[1] - MD5M_B;
c += digest[2] - MD5M_C;
d += digest[3] - MD5M_D;
a += digest[0] - make_u32x (MD5M_A);
b += digest[1] - make_u32x (MD5M_B);
c += digest[2] - make_u32x (MD5M_C);
d += digest[3] - make_u32x (MD5M_D);
COMPARE_S_SIMD (a, d, c, b);
}

@ -0,0 +1,813 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04420_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len);
const u32x pw_salt_len = out_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04420_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m04420_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m04420_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len);
const u32x pw_salt_len = out_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04420_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m04420_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,280 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04420_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_update (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04420_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_update (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,931 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04420_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04420_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m04420_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m04420_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] |= s3[2];
w3[3] |= s3[3];
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04420_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m04420_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,274 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04420_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha1_ctx_t ctx1 = ctx0;
sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_update (&ctx1, s, salt_len);
sha1_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
const u32 e = ctx1.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04420_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha1_ctx_t ctx1 = ctx0;
sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_update (&ctx1, s, salt_len);
sha1_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
const u32 e = ctx1.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,300 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04420_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector (&ctx0);
sha1_update_vector (&ctx0, w, pw_len);
sha1_update_vector (&ctx0, s, salt_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04420_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector (&ctx0);
sha1_update_vector (&ctx0, w, pw_len);
sha1_update_vector (&ctx0, s, salt_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,773 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04430_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* prepend salt
*/
const u32x out_salt_len = out_len + salt_len;
switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w0[2] |= salt_buf0[2];
w0[3] |= salt_buf0[3];
w1[0] |= salt_buf1[0];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
w2[0] |= salt_buf2[0];
w2[1] |= salt_buf2[1];
w2[2] |= salt_buf2[2];
w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1];
w3[2] |= salt_buf3[2];
w3[3] |= salt_buf3[3];
append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len);
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = out_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04430_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m04430_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m04430_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* prepend salt
*/
const u32x out_salt_len = out_len + salt_len;
switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w0[2] |= salt_buf0[2];
w0[3] |= salt_buf0[3];
w1[0] |= salt_buf1[0];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
w2[0] |= salt_buf2[0];
w2[1] |= salt_buf2[1];
w2[2] |= salt_buf2[2];
w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1];
w3[2] |= salt_buf3[2];
w3[3] |= salt_buf3[3];
append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len);
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = out_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04430_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m04430_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,266 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04430_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
sha1_ctx_t ctx1;
sha1_init (&ctx1);
sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0 = ctx1;
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04430_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
sha1_ctx_t ctx1;
sha1_init (&ctx1);
sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0 = ctx1;
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,886 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04430_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* prepend salt
*/
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w0[2] |= salt_buf0[2];
w0[3] |= salt_buf0[3];
w1[0] |= salt_buf1[0];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
w2[0] |= salt_buf2[0];
w2[1] |= salt_buf2[1];
w2[2] |= salt_buf2[2];
w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1];
w3[2] |= salt_buf3[2];
w3[3] |= salt_buf3[3];
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04430_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m04430_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m04430_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0];
salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1];
salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2];
salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3];
salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4];
salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5];
salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6];
salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7];
salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8];
salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9];
salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10];
salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11];
salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12];
salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13];
salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14];
salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* prepend salt
*/
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w0[2] |= salt_buf0[2];
w0[3] |= salt_buf0[3];
w1[0] |= salt_buf1[0];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
w2[0] |= salt_buf2[0];
w2[1] |= salt_buf2[1];
w2[2] |= salt_buf2[2];
w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1];
w3[2] |= salt_buf3[2];
/**
* sha1
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += make_u32x (SHA1M_A);
b += make_u32x (SHA1M_B);
c += make_u32x (SHA1M_C);
d += make_u32x (SHA1M_D);
e += make_u32x (SHA1M_E);
/**
* md5
*/
w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
wa_t = 0x80;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 40 * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b);
}
}
KERNEL_FQ void m04430_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m04430_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,265 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04430_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len);
sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha1_ctx_t ctx1 = ctx0;
sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
const u32 e = ctx1.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04430_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len);
sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha1_ctx_t ctx1 = ctx0;
sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
const u32 e = ctx1.h[4];
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,290 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04430_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
sha1_ctx_t ctx1;
sha1_init (&ctx1);
sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector_from_scalar (&ctx0, &ctx1);
sha1_update_vector (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04430_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
sha1_ctx_t ctx1;
sha1_init (&ctx1);
sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector_from_scalar (&ctx0, &ctx1);
sha1_update_vector (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -436,7 +436,7 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t
u32 r[16];
// not needed because of cbc mode -- implementation flaw !!. first 16 byte are not interessting
// not needed because of cbc mode -- implementation flaw !!. first 16 bytes are not interesting
r[0] = 0;
r[1] = 0;

@ -327,6 +327,9 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -357,6 +360,11 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -467,6 +475,11 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

@ -340,7 +340,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)

@ -20,22 +20,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -18,22 +18,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -18,22 +18,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -20,22 +20,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -18,22 +18,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -18,22 +18,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -30,22 +30,24 @@ CONSTANT_VK u32a padding[8] =
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -28,22 +28,24 @@ CONSTANT_VK u32a padding[8] =
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -28,22 +28,24 @@ CONSTANT_VK u32a padding[8] =
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -17,22 +17,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -19,22 +19,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;
@ -321,7 +323,7 @@ DECLSPEC u32 do_round (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_
make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
// make sure pwbl_len is calculcated before it gets changed
// make sure pwbl_len is calculated before it gets changed
const u32 pwbl_len = pw_len + ctx->dgst_len;

@ -25,22 +25,24 @@
typedef struct pdf
{
int V;
int R;
int P;
int V;
int R;
int P;
int enc_md;
int enc_md;
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
u32 id_buf[8];
u32 u_buf[32];
u32 o_buf[32];
int id_len;
int o_len;
int u_len;
int id_len;
int o_len;
int u_len;
u32 rc4key[2];
u32 rc4data[2];
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;

@ -357,54 +357,98 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, salt_bufs[SALT_POS_HOST].salt_buf, 64);
for (u32 i = 0, j = 1; i < 16; i += 8, j += 1)
{
streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx2, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx2);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx2.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx2.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx2.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx2.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx2.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx2.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx2.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx2.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
}
u32 i = 0;
u32 j = 1;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
streebog512_hmac_ctx_t streebog512_hmac_ctx_v1 = streebog512_hmac_ctx;
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx_v1, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx_v1);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v1.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v1.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v1.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v1.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v1.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v1.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v1.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v1.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
i=8;
j=2;
streebog512_hmac_ctx_t streebog512_hmac_ctx_v2 = streebog512_hmac_ctx;
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx_v2, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx_v2);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v2.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v2.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v2.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v2.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v2.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v2.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v2.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v2.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
}
KERNEL_FQ void m13772_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))

@ -422,54 +422,142 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, salt_bufs[SALT_POS_HOST].salt_buf, 64);
for (u32 i = 0, j = 1; i < 24; i += 8, j += 1)
{
streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx2, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx2);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx2.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx2.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx2.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx2.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx2.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx2.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx2.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx2.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
}
u32 i = 0;
u32 j = 1;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
streebog512_hmac_ctx_t streebog512_hmac_ctx_v1 = streebog512_hmac_ctx;
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx_v1, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx_v1);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v1.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v1.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v1.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v1.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v1.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v1.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v1.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v1.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
i = 8;
j = 2;
streebog512_hmac_ctx_t streebog512_hmac_ctx_v2 = streebog512_hmac_ctx;
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx_v2, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx_v2);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v2.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v2.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v2.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v2.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v2.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v2.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v2.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v2.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
i = 16;
j = 3;
streebog512_hmac_ctx_t streebog512_hmac_ctx_v3 = streebog512_hmac_ctx;
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
streebog512_hmac_update_64 (&streebog512_hmac_ctx_v3, w0, w1, w2, w3, 4);
streebog512_hmac_final (&streebog512_hmac_ctx_v3);
tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v3.opad.h[0];
tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v3.opad.h[1];
tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v3.opad.h[2];
tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v3.opad.h[3];
tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v3.opad.h[4];
tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v3.opad.h[5];
tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v3.opad.h[6];
tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v3.opad.h[7];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5];
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6];
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7];
}
KERNEL_FQ void m13773_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))

@ -39,7 +39,7 @@ typedef struct dpapimk
u32 SID_offset;
/* here only for possible
forward compatibiliy
forward compatibility
*/
// u8 cipher_algo[16];
// u8 hash_algo[16];

@ -41,7 +41,7 @@ typedef struct dpapimk
u32 SID_offset;
/* here only for possible
forward compatibiliy
forward compatibility
*/
// u8 cipher_algo[16];
// u8 hash_algo[16];

@ -463,6 +463,9 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -493,6 +496,11 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -603,6 +611,11 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

@ -40,7 +40,7 @@ typedef struct dpapimk
u32 SID_offset;
/* here only for possible
forward compatibiliy
forward compatibility
*/
// u8 cipher_algo[16];
// u8 hash_algo[16];

@ -48,7 +48,7 @@ typedef struct dpapimk
u32 SID_offset;
/* here only for possible
forward compatibiliy
forward compatibility
*/
// u8 cipher_algo[16];
// u8 hash_algo[16];

@ -0,0 +1,404 @@
/**
* Author......: Netherlands Forensic Institute
* based upon 17010
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#include M2S(INCLUDE_PATH/inc_cipher_cast.h)
#endif
typedef struct gpg
{
u32 cipher_algo;
u32 iv[4]; // make this dynamic based on the input hash.. iv_size can be 8 bytes or 16 bytes
u32 modulus_size;
u32 encrypted_data[384];
u32 encrypted_data_size;
} gpg_t;
typedef struct gpg_tmp
{
// buffer for a maximum of 256 + 8 characters, we extend it to 320 characters so it's always 64 byte aligned
u32 salted_pw_block[80];
// actual number of bytes in 'salted_pwd' that are used since salt and password are copied multiple times into the buffer
u32 salted_pw_block_len;
u32 h[10];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 len;
} gpg_tmp_t;
DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c)
{
const int c_mod_4 = c & 3;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8)));
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8));
#endif
const u32 r = hc_byte_perm (b, a, selector);
#endif
return r;
}
DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len)
{
const u32 start_index = (offset - 1) >> 2;
const u32 count = ((offset + len + 3) >> 2) - start_index;
const int off_mod_4 = offset & 3;
const int off_minus_4 = 4 - off_mod_4;
block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4);
for (u32 idx = 1; idx < count; idx++)
{
block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4);
}
}
DECLSPEC void memzero_le_S (PRIVATE_AS u32 *block, const u32 start_offset, const u32 end_offset)
{
const u32 start_idx = start_offset / 4;
// zero out bytes in the first u32 starting from 'start_offset'
// math is a bit complex to avoid shifting by 32 bits, which is not possible on some architectures
block[start_idx] &= ~(0xffffffff << ((start_offset & 3) * 8));
const u32 end_idx = (end_offset + 3) / 4;
// zero out bytes in u32 units -- note that the last u32 is completely zeroed!
for (u32 i = start_idx + 1; i < end_idx; i++)
{
block[i] = 0;
}
}
DECLSPEC void memzero_be_S (PRIVATE_AS u32 *block, const u32 start_offset, const u32 end_offset)
{
const u32 start_idx = start_offset / 4;
// zero out bytes in the first u32 starting from 'start_offset'
// math is a bit complex to avoid shifting by 32 bits, which is not possible on some architectures
block[start_idx] &= ~(0xffffffff >> ((start_offset & 3) * 8));
const u32 end_idx = (end_offset + 3) / 4;
// zero out bytes in u32 units -- note that the last u32 is completely zeroed!
for (u32 i = start_idx + 1; i < end_idx; i++)
{
block[i] = 0;
}
}
DECLSPEC void cast128_decrypt_cfb (GLOBAL_AS const u32 *encrypted_data, int data_len, PRIVATE_AS const u32 *iv, PRIVATE_AS const u32 *key, PRIVATE_AS u32 *decrypted_data, SHM_TYPE u32 (*s_S)[256])
{
u8 essiv[8];
for (int j=0; j<8; j++) { essiv[j] = 0; }
// TODO remove this casting, would speedup the attack
// We need to do this casting to get values in local memory and have them not be constant.
u32 lencrypted_data[384]; // I'd prefer not to hardcode to 384, but rest of kernel uses the same value
for (u32 i = 0; i < (data_len + 3) / 4; i += 4)
{
lencrypted_data[i + 0] = encrypted_data[i + 0];
lencrypted_data[i + 1] = encrypted_data[i + 1];
lencrypted_data[i + 2] = encrypted_data[i + 2];
lencrypted_data[i + 3] = encrypted_data[i + 3];
}
PRIVATE_AS u8 *lencrypted_data8 = (PRIVATE_AS u8*)lencrypted_data;
PRIVATE_AS u8 *decrypted_data8 = (PRIVATE_AS u8*)decrypted_data;
PRIVATE_AS u8 *key8 = (PRIVATE_AS u8*)key;
// Copy the IV, since this will be modified
// essiv[0] = iv[0]; // IV is zero for our example, but we load it dynamically..
// essiv[1] = iv[1]; // IV is zero for our example, but we load it dynamically..
// essiv[2] = 0;
// essiv[3] = 0; //TODO load IV dynamically, code doesn't make any sense currently as essiv is now a u8
CAST_KEY ck;
Cast5SetKey(&ck, 16, key8, s_S);
// Decrypt an CAST5 encrypted block
for (u32 i = 0; i < (data_len + 3) ; i += 8)
{
Cast5Encrypt(essiv, &decrypted_data8[i], &ck, s_S);
for (int j=0; j<8; j++) { decrypted_data8[i+j] ^= lencrypted_data8[i + j]; }
// Note: Not necessary if you are only decrypting a single block!
for (int j=0; j<8; j++) {
essiv[j] = lencrypted_data8[i + j];
}
}
}
DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded_data_size)
{
// Check the SHA-1 of the decrypted data which is stored at the end of the decrypted data
const u32 sha1_byte_off = (decoded_data_size - 20);
const u32 sha1_u32_off = sha1_byte_off / 4;
u32 expected_sha1[5];
expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off);
expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off);
expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off);
expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off);
expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off);
memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32));
sha1_ctx_t ctx;
sha1_init (&ctx);
sha1_update_swap (&ctx, decoded_data, sha1_byte_off);
sha1_final (&ctx);
return (expected_sha1[0] == hc_swap32_S (ctx.h[0]))
&& (expected_sha1[1] == hc_swap32_S (ctx.h[1]))
&& (expected_sha1[2] == hc_swap32_S (ctx.h[2]))
&& (expected_sha1[3] == hc_swap32_S (ctx.h[3]))
&& (expected_sha1[4] == hc_swap32_S (ctx.h[4]));
}
KERNEL_FQ void m17040_init (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t))
{
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
const u32 pw_len = pws[gid].pw_len;
const u32 salted_pw_len = (salt_bufs[SALT_POS_HOST].salt_len + pw_len);
u32 salted_pw_block[80];
// concatenate salt and password -- the salt is always 8 bytes
salted_pw_block[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
salted_pw_block[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
for (u32 idx = 0; idx < 64; idx++) salted_pw_block[idx + 2] = pws[gid].i[idx];
// zero remainder of buffer
for (u32 idx = 66; idx < 80; idx++) salted_pw_block[idx] = 0;
// create a number of copies for efficiency
const u32 copies = 80 * sizeof(u32) / salted_pw_len;
for (u32 idx = 1; idx < copies; idx++)
{
memcat_le_S (salted_pw_block, idx * salted_pw_len, salted_pw_block, salted_pw_len);
}
for (u32 idx = 0; idx < 80; idx++)
{
tmps[gid].salted_pw_block[idx] = hc_swap32_S (salted_pw_block[idx]);
}
tmps[gid].salted_pw_block_len = (copies * salted_pw_len);
tmps[gid].h[0] = SHA1M_A;
tmps[gid].h[1] = SHA1M_B;
tmps[gid].h[2] = SHA1M_C;
tmps[gid].h[3] = SHA1M_D;
tmps[gid].h[4] = SHA1M_E;
tmps[gid].h[5] = SHA1M_A;
tmps[gid].h[6] = SHA1M_B;
tmps[gid].h[7] = SHA1M_C;
tmps[gid].h[8] = SHA1M_D;
tmps[gid].h[9] = SHA1M_E;
tmps[gid].len = 0;
}
KERNEL_FQ void m17040_loop_prepare (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t))
{
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
tmps[gid].h[0] = SHA1M_A;
tmps[gid].h[1] = SHA1M_B;
tmps[gid].h[2] = SHA1M_C;
tmps[gid].h[3] = SHA1M_D;
tmps[gid].h[4] = SHA1M_E;
tmps[gid].h[5] = SHA1M_A;
tmps[gid].h[6] = SHA1M_B;
tmps[gid].h[7] = SHA1M_C;
tmps[gid].h[8] = SHA1M_D;
tmps[gid].h[9] = SHA1M_E;
tmps[gid].w0[0] = 0;
tmps[gid].w0[1] = 0;
tmps[gid].w0[2] = 0;
tmps[gid].w0[3] = 0;
tmps[gid].w1[0] = 0;
tmps[gid].w1[1] = 0;
tmps[gid].w1[2] = 0;
tmps[gid].w1[3] = 0;
tmps[gid].w2[0] = 0;
tmps[gid].w2[1] = 0;
tmps[gid].w2[2] = 0;
tmps[gid].w2[3] = 0;
tmps[gid].w3[0] = 0;
tmps[gid].w3[1] = 0;
tmps[gid].w3[2] = 0;
tmps[gid].w3[3] = 0;
tmps[gid].len = 0;
}
KERNEL_FQ void m17040_loop (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t))
{
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
if (gid >= GID_CNT) return;
// get the prepared buffer from the gpg_tmp_t struct into a local buffer
u32 salted_pw_block[80];
for (int i = 0; i < 80; i++) salted_pw_block[i] = tmps[gid].salted_pw_block[i];
const u32 salted_pw_block_len = tmps[gid].salted_pw_block_len;
// do we really need this, since the salt is always length 8?
if (salted_pw_block_len == 0) return;
/**
* context load
*/
sha1_ctx_t ctx;
for (int i = 0; i < 5; i++) ctx.h[i] = tmps[gid].h[i];
for (int i = 0; i < 4; i++) ctx.w0[i] = tmps[gid].w0[i];
for (int i = 0; i < 4; i++) ctx.w1[i] = tmps[gid].w1[i];
for (int i = 0; i < 4; i++) ctx.w2[i] = tmps[gid].w2[i];
for (int i = 0; i < 4; i++) ctx.w3[i] = tmps[gid].w3[i];
const u32 pw_len = pws[gid].pw_len;
const u32 salted_pw_len = (salt_bufs[SALT_POS_HOST].salt_len + pw_len);
const u32 remaining_bytes = salted_pw_len % 4;
ctx.len = tmps[gid].len;
memzero_be_S (salted_pw_block, salted_pw_len, salted_pw_block_len);
// zero out last bytes of password if not a multiple of 4
// TODO do we need this wo don't feed the remainder to the hashing algorithm anyway..??
sha1_update (&ctx, salted_pw_block, salted_pw_len);
sha1_final (&ctx);
/**
* context save
*/
for (int i = 0; i < 5; i++) tmps[gid].h[i] = ctx.h[i];
// this is the sha1 hash of the salt+password:
for (int i = 0; i < 4; i++) tmps[gid].w0[i] = ctx.w0[i];
for (int i = 0; i < 4; i++) tmps[gid].w1[i] = ctx.w1[i];
for (int i = 0; i < 4; i++) tmps[gid].w2[i] = ctx.w2[i];
for (int i = 0; i < 4; i++) tmps[gid].w3[i] = ctx.w3[i];
tmps[gid].len = ctx.len;
}
KERNEL_FQ void m17040_comp (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t))
{
// not in use here, special case...
}
KERNEL_FQ void m17040_aux1 (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
*/
#ifdef REAL_SHM
LOCAL_VK u32 s_S[8][256];
for (u32 i = lid; i < 256; i += lsz)
{
s_S[0][i] = S[0][i];
s_S[1][i] = S[1][i];
s_S[2][i] = S[2][i];
s_S[3][i] = S[3][i];
s_S[4][i] = S[4][i];
s_S[5][i] = S[5][i];
s_S[6][i] = S[6][i];
s_S[7][i] = S[7][i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_S)[256] = S;
#endif
if (gid >= GID_CNT) return;
// retrieve and use the SHA-1 as the key for CAST5
u32 cast_key[5];
for (int i = 0; i < 5; i++) cast_key[i] = hc_swap32_S (tmps[gid].h[i]);
u32 iv[4] = {0};
for (int idx = 0; idx < 4; idx++) iv[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[idx];
u32 decoded_data[384];
const u32 enc_data_size = esalt_bufs[DIGESTS_OFFSET_HOST].encrypted_data_size;
cast128_decrypt_cfb (esalt_bufs[DIGESTS_OFFSET_HOST].encrypted_data, enc_data_size, iv, cast_key, decoded_data, s_S);
if (check_decoded_data (decoded_data, enc_data_size))
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET_HOST]) == 0)
{
mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, DIGESTS_OFFSET_HOST + 0, gid, 0, 0, 0);
}
}
}

@ -35,6 +35,7 @@ typedef struct odf12
u32 iv[4];
u32 checksum[8];
u32 encrypted_data[256];
int encrypted_len;
} odf12_t;
@ -352,111 +353,49 @@ KERNEL_FQ void m18400_comp (KERN_ATTR_TMPS_ESALT (odf12_tmp_t, odf12_t))
iv[2] = es->iv[2];
iv[3] = es->iv[3];
u32 ct[4];
u32 pt[256];
u32 pt1[4];
u32 pt2[4];
u32 pt3[4];
u32 pt4[4];
sha256_ctx_t sha256_ctx;
sha256_init (&sha256_ctx);
// decrypt aes-cbc and calculate plaintext checksum at the same time
for (int i = 0; i < 16; i++)
for (int i = 0, j = 0; i < es->encrypted_len; i += 16, j += 4)
{
const int i16 = i * 16;
u32 ct[4];
ct[0] = es->encrypted_data[i16 + 0];
ct[1] = es->encrypted_data[i16 + 1];
ct[2] = es->encrypted_data[i16 + 2];
ct[3] = es->encrypted_data[i16 + 3];
ct[0] = es->encrypted_data[j + 0];
ct[1] = es->encrypted_data[j + 1];
ct[2] = es->encrypted_data[j + 2];
ct[3] = es->encrypted_data[j + 3];
aes256_decrypt (ks, ct, pt1, s_td0, s_td1, s_td2, s_td3, s_td4);
aes256_decrypt (ks, ct, pt + j, s_td0, s_td1, s_td2, s_td3, s_td4);
pt1[0] ^= iv[0];
pt1[1] ^= iv[1];
pt1[2] ^= iv[2];
pt1[3] ^= iv[3];
iv[0] = ct[0];
iv[1] = ct[1];
iv[2] = ct[2];
iv[3] = ct[3];
ct[0] = es->encrypted_data[i16 + 4];
ct[1] = es->encrypted_data[i16 + 5];
ct[2] = es->encrypted_data[i16 + 6];
ct[3] = es->encrypted_data[i16 + 7];
aes256_decrypt (ks, ct, pt2, s_td0, s_td1, s_td2, s_td3, s_td4);
pt2[0] ^= iv[0];
pt2[1] ^= iv[1];
pt2[2] ^= iv[2];
pt2[3] ^= iv[3];
iv[0] = ct[0];
iv[1] = ct[1];
iv[2] = ct[2];
iv[3] = ct[3];
ct[0] = es->encrypted_data[i16 + 8];
ct[1] = es->encrypted_data[i16 + 9];
ct[2] = es->encrypted_data[i16 + 10];
ct[3] = es->encrypted_data[i16 + 11];
aes256_decrypt (ks, ct, pt3, s_td0, s_td1, s_td2, s_td3, s_td4);
pt3[0] ^= iv[0];
pt3[1] ^= iv[1];
pt3[2] ^= iv[2];
pt3[3] ^= iv[3];
pt[j + 0] ^= iv[0];
pt[j + 1] ^= iv[1];
pt[j + 2] ^= iv[2];
pt[j + 3] ^= iv[3];
iv[0] = ct[0];
iv[1] = ct[1];
iv[2] = ct[2];
iv[3] = ct[3];
}
ct[0] = es->encrypted_data[i16 + 12];
ct[1] = es->encrypted_data[i16 + 13];
ct[2] = es->encrypted_data[i16 + 14];
ct[3] = es->encrypted_data[i16 + 15];
const int full64 = es->encrypted_len / 64;
aes256_decrypt (ks, ct, pt4, s_td0, s_td1, s_td2, s_td3, s_td4);
const int encrypted_len64 = full64 * 64;
pt4[0] ^= iv[0];
pt4[1] ^= iv[1];
pt4[2] ^= iv[2];
pt4[3] ^= iv[3];
sha256_ctx_t sha256_ctx;
iv[0] = ct[0];
iv[1] = ct[1];
iv[2] = ct[2];
iv[3] = ct[3];
sha256_init (&sha256_ctx);
pt1[0] = hc_swap32_S (pt1[0]);
pt1[1] = hc_swap32_S (pt1[1]);
pt1[2] = hc_swap32_S (pt1[2]);
pt1[3] = hc_swap32_S (pt1[3]);
sha256_update_swap (&sha256_ctx, pt, encrypted_len64);
pt2[0] = hc_swap32_S (pt2[0]);
pt2[1] = hc_swap32_S (pt2[1]);
pt2[2] = hc_swap32_S (pt2[2]);
pt2[3] = hc_swap32_S (pt2[3]);
const int remaining64 = es->encrypted_len - encrypted_len64;
pt3[0] = hc_swap32_S (pt3[0]);
pt3[1] = hc_swap32_S (pt3[1]);
pt3[2] = hc_swap32_S (pt3[2]);
pt3[3] = hc_swap32_S (pt3[3]);
if (remaining64)
{
PRIVATE_AS u32 *pt_remaining = pt + (encrypted_len64 / 4);
pt4[0] = hc_swap32_S (pt4[0]);
pt4[1] = hc_swap32_S (pt4[1]);
pt4[2] = hc_swap32_S (pt4[2]);
pt4[3] = hc_swap32_S (pt4[3]);
truncate_block_16x4_be_S (pt_remaining + 0, pt_remaining + 4, pt_remaining + 8, pt_remaining + 12, remaining64);
sha256_update_64 (&sha256_ctx, pt1, pt2, pt3, pt4, 64);
sha256_update_swap (&sha256_ctx, pt_remaining, remaining64);
}
sha256_final (&sha256_ctx);

@ -33,6 +33,7 @@ typedef struct odf11
u32 iv[2];
u32 checksum[5];
u32 encrypted_data[256];
int encrypted_len;
} odf11_t;
@ -349,7 +350,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
@ -759,116 +760,48 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) m18600_comp (KERN_ATTR_
GLOBAL_AS const odf11_t *es = &esalt_bufs[DIGESTS_OFFSET_HOST];
u32 ct[2];
u32 pt0[4];
u32 pt1[4];
u32 pt2[4];
u32 pt3[4];
u32 buf[2];
buf[0] = es->iv[0];
buf[1] = es->iv[1];
u32 iv[2];
sha1_ctx_t sha1_ctx;
iv[0] = es->iv[0];
iv[1] = es->iv[1];
sha1_init (&sha1_ctx);
u32 pt[256];
// decrypt blowfish-cfb and calculate plaintext checksum at the same time
for (int i = 0; i < 16; i++)
for (int i = 0, j = 0; i < es->encrypted_len; i += 8, j += 2)
{
const int i16 = i * 16;
ct[0] = es->encrypted_data[i16 + 0];
ct[1] = es->encrypted_data[i16 + 1];
BF_ENCRYPT (buf[0], buf[1]);
pt0[0] = ct[0] ^ buf[0];
pt0[1] = ct[1] ^ buf[1];
buf[0] = ct[0];
buf[1] = ct[1];
ct[0] = es->encrypted_data[i16 + 2];
ct[1] = es->encrypted_data[i16 + 3];
BF_ENCRYPT (buf[0], buf[1]);
pt0[2] = ct[0] ^ buf[0];
pt0[3] = ct[1] ^ buf[1];
buf[0] = ct[0];
buf[1] = ct[1];
ct[0] = es->encrypted_data[i16 + 4];
ct[1] = es->encrypted_data[i16 + 5];
u32 ct[2];
BF_ENCRYPT (buf[0], buf[1]);
ct[0] = es->encrypted_data[j + 0];
ct[1] = es->encrypted_data[j + 1];
pt1[0] = ct[0] ^ buf[0];
pt1[1] = ct[1] ^ buf[1];
BF_ENCRYPT (iv[0], iv[1]);
buf[0] = ct[0];
buf[1] = ct[1];
pt[j + 0] = ct[0] ^ iv[0];
pt[j + 1] = ct[1] ^ iv[1];
ct[0] = es->encrypted_data[i16 + 6];
ct[1] = es->encrypted_data[i16 + 7];
BF_ENCRYPT (buf[0], buf[1]);
pt1[2] = ct[0] ^ buf[0];
pt1[3] = ct[1] ^ buf[1];
buf[0] = ct[0];
buf[1] = ct[1];
ct[0] = es->encrypted_data[i16 + 8];
ct[1] = es->encrypted_data[i16 + 9];
BF_ENCRYPT (buf[0], buf[1]);
pt2[0] = ct[0] ^ buf[0];
pt2[1] = ct[1] ^ buf[1];
buf[0] = ct[0];
buf[1] = ct[1];
ct[0] = es->encrypted_data[i16 + 10];
ct[1] = es->encrypted_data[i16 + 11];
BF_ENCRYPT (buf[0], buf[1]);
pt2[2] = ct[0] ^ buf[0];
pt2[3] = ct[1] ^ buf[1];
buf[0] = ct[0];
buf[1] = ct[1];
iv[0] = ct[0];
iv[1] = ct[1];
}
ct[0] = es->encrypted_data[i16 + 12];
ct[1] = es->encrypted_data[i16 + 13];
const int full64 = es->encrypted_len / 64;
BF_ENCRYPT (buf[0], buf[1]);
const int encrypted_len64 = full64 * 64;
pt3[0] = ct[0] ^ buf[0];
pt3[1] = ct[1] ^ buf[1];
sha1_ctx_t sha1_ctx;
buf[0] = ct[0];
buf[1] = ct[1];
sha1_init (&sha1_ctx);
ct[0] = es->encrypted_data[i16 + 14];
ct[1] = es->encrypted_data[i16 + 15];
sha1_update (&sha1_ctx, pt, encrypted_len64);
BF_ENCRYPT (buf[0], buf[1]);
const int remaining64 = es->encrypted_len - encrypted_len64;
pt3[2] = ct[0] ^ buf[0];
pt3[3] = ct[1] ^ buf[1];
if (remaining64)
{
PRIVATE_AS u32 *pt_remaining = pt + (encrypted_len64 / 4);
buf[0] = ct[0];
buf[1] = ct[1];
truncate_block_16x4_be_S (pt_remaining + 0, pt_remaining + 4, pt_remaining + 8, pt_remaining + 12, remaining64);
sha1_update_64 (&sha1_ctx, pt0, pt1, pt2, pt3, 64);
sha1_update (&sha1_ctx, pt_remaining, remaining64);
}
sha1_final (&sha1_ctx);

File diff suppressed because it is too large Load Diff

@ -0,0 +1,265 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20712_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20712_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,259 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20712_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 s[64] = { 0 };
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
sha256_ctx_t ctx1;
sha256_init (&ctx1);
sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha256_ctx_t ctx0 = ctx1;
sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20712_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
sha256_ctx_t ctx1;
sha256_init (&ctx1);
sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha256_ctx_t ctx0 = ctx1;
sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,285 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20712_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha256_ctx_vector_t ctx0;
sha256_init_vector (&ctx0);
sha256_update_vector (&ctx0, w, pw_len);
sha256_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
const u32x f = ctx0.h[5];
const u32x g = ctx0.h[6];
const u32x h = ctx0.h[7];
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
_w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
_w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
_w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
_w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
_w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
_w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
_w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
_w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 64);
sha256_update_vector (&ctx, s, salt_len);
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20712_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha256_ctx_vector_t ctx0;
sha256_init_vector (&ctx0);
sha256_update_vector (&ctx0, w, pw_len);
sha256_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
const u32x f = ctx0.h[5];
const u32x g = ctx0.h[6];
const u32x h = ctx0.h[7];
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
_w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
_w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
_w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
_w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
_w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
_w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
_w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
_w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 64);
sha256_update_vector (&ctx, s, salt_len);
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,282 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ void m21310_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
sha1_ctx_t ctx00;
sha1_init(&ctx00);
sha1_update_global (&ctx00, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
sha1_ctx_t ctx0 = ctx00;
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21310_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
sha1_ctx_t ctx00;
sha1_init(&ctx00);
sha1_update_global (&ctx00, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0 = ctx00;
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,275 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ void m21310_mxx (KERN_ATTR_ESALT (md5_double_salt))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global (&ctx00, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len);
sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha1_ctx_t ctx0 = ctx00;
sha1_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21310_sxx (KERN_ATTR_ESALT (md5_double_salt))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global (&ctx00, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len);
sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
sha1_ctx_t ctx0 = ctx00;
sha1_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,312 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ void m21310_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global (&ctx00, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector_from_scalar (&ctx0, &ctx00);
sha1_update_vector_swap (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector_from_scalar (&ctx, &ctx11);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21310_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global (&ctx00, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector_from_scalar (&ctx0, &ctx00);
sha1_update_vector_swap (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector_from_scalar (&ctx, &ctx11);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -400,6 +400,9 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -430,6 +433,11 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -605,6 +613,11 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

@ -38,6 +38,8 @@ typedef struct pdf
u32 rc4key[2];
u32 rc4data[2];
int P_minus;
} pdf_t;
typedef struct pdf14_tmp

@ -371,7 +371,7 @@ KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
*/
/*
// decrypt buffer is not usefull here, skip
// decrypt buffer is not useful here, skip
u32 dec[14] = { 0 };
AES_GCM_GCTR (key, J0, enc, enc_len, dec, s_te0, s_te1, s_te2, s_te3, s_te4);

@ -352,7 +352,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)

@ -348,7 +348,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val, const u6
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key, MAYBE_UNUSED const u64 lid)

@ -21,11 +21,11 @@
typedef struct pbkdf2_sha256_tmp
{
u32 ipad[8];
u32 opad[8];
u32 ipad[8];
u32 opad[8];
u32 dgst[32];
u32 out[32];
u32 dgst[32];
u32 out[32];
} pbkdf2_sha256_tmp_t;
@ -34,7 +34,7 @@ typedef struct pbkdf2_sha256_aes_gcm
u32 salt_buf[64];
u32 iv_buf[4];
u32 iv_len;
u32 ct_buf[784]; // TODO this can be smaller and would speedup the attack, only 64 bytes of ciphertext are allowed
u32 ct_buf[16];
u32 ct_len;
} pbkdf2_sha256_aes_gcm_t;
@ -368,7 +368,6 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
AES_GCM_decrypt (key, J0, ct, 32, pt, s_te0, s_te1, s_te2, s_te3, s_te4);
const int correct = is_valid_printable_32 (pt[0])
+ is_valid_printable_32 (pt[1])
+ is_valid_printable_32 (pt[2])
@ -382,7 +381,7 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
/*
const int pt_len = 28; // not using 32 byte but 28 because our UTF8 allows up to 4 byte per character and since we decrypt 32 byte
// only we can't garantee it is not in the middle of a UTF8 byte stream at that point
// only we can't guarantee it is not in the middle of a UTF8 byte stream at that point
if (hc_enc_scan (pt, pt_len))
{

@ -351,6 +351,9 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -381,6 +384,11 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -557,6 +565,11 @@ KERNEL_FQ void m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

@ -337,6 +337,9 @@ KERNEL_FQ void m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -367,6 +370,11 @@ KERNEL_FQ void m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -525,6 +533,11 @@ KERNEL_FQ void m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

@ -348,7 +348,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val, const u6
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key, MAYBE_UNUSED const u64 lid)

@ -351,6 +351,9 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -381,6 +384,11 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -557,6 +565,11 @@ KERNEL_FQ void m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

@ -349,7 +349,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val, const u6
#else
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from computation
// makes sense if there are not thread ID's (for instance on CPU)
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key, MAYBE_UNUSED const u64 lid)

@ -48,8 +48,6 @@ KERNEL_FQ void m31000_m04 (KERN_ATTR_RULES ())
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
@ -185,7 +183,7 @@ KERNEL_FQ void m31000_s04 (KERN_ATTR_RULES ())
const u32x r1 = h[DGST_R1];
const u32x r2 = h[DGST_R2];
const u32x r3 = h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -47,7 +47,7 @@ KERNEL_FQ void m31000_mxx (KERN_ATTR_RULES ())
blake2s_init (&ctx);
blake2s_update (&ctx, tmp.i, tmp.pw_len);
blake2s_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];

@ -145,10 +145,10 @@ KERNEL_FQ void m31300_m04 (KERN_ATTR_RULES ())
MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
w0[0] = a + MD4M_A;
w0[1] = b + MD4M_B;
w0[2] = c + MD4M_C;
w0[3] = d + MD4M_D;
w0[0] = a + make_u32x (MD4M_A);
w0[1] = b + make_u32x (MD4M_B);
w0[2] = c + make_u32x (MD4M_C);
w0[3] = d + make_u32x (MD4M_D);
w1[0] = salt_buf[ 0];
w1[1] = salt_buf[ 1];
w1[2] = salt_buf[ 2];
@ -237,10 +237,10 @@ KERNEL_FQ void m31300_m04 (KERN_ATTR_RULES ())
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a = a + MD5M_A;
b = b + MD5M_B;
c = c + MD5M_C;
d = d + MD5M_D;
a = a + make_u32x (MD5M_A);
b = b + make_u32x (MD5M_B);
c = c + make_u32x (MD5M_C);
d = d + make_u32x (MD5M_D);
const u32x a1 = a;
const u32x b1 = b;
@ -491,10 +491,10 @@ KERNEL_FQ void m31300_s04 (KERN_ATTR_RULES ())
MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
w0[0] = a + MD4M_A;
w0[1] = b + MD4M_B;
w0[2] = c + MD4M_C;
w0[3] = d + MD4M_D;
w0[0] = a + make_u32x (MD4M_A);
w0[1] = b + make_u32x (MD4M_B);
w0[2] = c + make_u32x (MD4M_C);
w0[3] = d + make_u32x (MD4M_D);
w1[0] = salt_buf[ 0];
w1[1] = salt_buf[ 1];
w1[2] = salt_buf[ 2];
@ -583,10 +583,10 @@ KERNEL_FQ void m31300_s04 (KERN_ATTR_RULES ())
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a = a + MD5M_A;
b = b + MD5M_B;
c = c + MD5M_C;
d = d + MD5M_D;
a = a + make_u32x (MD5M_A);
b = b + make_u32x (MD5M_B);
c = c + make_u32x (MD5M_C);
d = d + make_u32x (MD5M_D);
const u32x a1 = a;
const u32x b1 = b;

@ -205,11 +205,10 @@ KERNEL_FQ void m31300_m04 (KERN_ATTR_BASIC ())
MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
w0[0] = a + MD4M_A;
w0[1] = b + MD4M_B;
w0[2] = c + MD4M_C;
w0[3] = d + MD4M_D;
w0[0] = a + make_u32x (MD4M_A);
w0[1] = b + make_u32x (MD4M_B);
w0[2] = c + make_u32x (MD4M_C);
w0[3] = d + make_u32x (MD4M_D);
w1[0] = salt_buf[ 0];
w1[1] = salt_buf[ 1];
w1[2] = salt_buf[ 2];
@ -298,10 +297,10 @@ KERNEL_FQ void m31300_m04 (KERN_ATTR_BASIC ())
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a = a + MD5M_A;
b = b + MD5M_B;
c = c + MD5M_C;
d = d + MD5M_D;
a = a + make_u32x (MD5M_A);
b = b + make_u32x (MD5M_B);
c = c + make_u32x (MD5M_C);
d = d + make_u32x (MD5M_D);
const u32x a1 = a;
const u32x b1 = b;
@ -615,10 +614,10 @@ KERNEL_FQ void m31300_s04 (KERN_ATTR_BASIC ())
MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
w0[0] = a + MD4M_A;
w0[1] = b + MD4M_B;
w0[2] = c + MD4M_C;
w0[3] = d + MD4M_D;
w0[0] = a + make_u32x (MD4M_A);
w0[1] = b + make_u32x (MD4M_B);
w0[2] = c + make_u32x (MD4M_C);
w0[3] = d + make_u32x (MD4M_D);
w1[0] = salt_buf[ 0];
w1[1] = salt_buf[ 1];
w1[2] = salt_buf[ 2];
@ -707,10 +706,10 @@ KERNEL_FQ void m31300_s04 (KERN_ATTR_BASIC ())
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a = a + MD5M_A;
b = b + MD5M_B;
c = c + MD5M_C;
d = d + MD5M_D;
a = a + make_u32x (MD5M_A);
b = b + make_u32x (MD5M_B);
c = c + make_u32x (MD5M_C);
d = d + make_u32x (MD5M_D);
const u32x a1 = a;
const u32x b1 = b;

@ -166,10 +166,10 @@ DECLSPEC void m31300m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO
u32x w2[4];
u32x w3[4];
w0[0] = a + MD4M_A;
w0[1] = b + MD4M_B;
w0[2] = c + MD4M_C;
w0[3] = d + MD4M_D;
w0[0] = a + make_u32x (MD4M_A);
w0[1] = b + make_u32x (MD4M_B);
w0[2] = c + make_u32x (MD4M_C);
w0[3] = d + make_u32x (MD4M_D);
w1[0] = salt_buf[ 0];
w1[1] = salt_buf[ 1];
w1[2] = salt_buf[ 2];
@ -258,10 +258,10 @@ DECLSPEC void m31300m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a = a + MD5M_A;
b = b + MD5M_B;
c = c + MD5M_C;
d = d + MD5M_D;
a = a + make_u32x (MD5M_A);
b = b + make_u32x (MD5M_B);
c = c + make_u32x (MD5M_C);
d = d + make_u32x (MD5M_D);
const u32x a1 = a;
const u32x b1 = b;
@ -527,10 +527,10 @@ DECLSPEC void m31300s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO
u32x w2[4];
u32x w3[4];
w0[0] = a + MD4M_A;
w0[1] = b + MD4M_B;
w0[2] = c + MD4M_C;
w0[3] = d + MD4M_D;
w0[0] = a + make_u32x (MD4M_A);
w0[1] = b + make_u32x (MD4M_B);
w0[2] = c + make_u32x (MD4M_C);
w0[3] = d + make_u32x (MD4M_D);
w1[0] = salt_buf[ 0];
w1[1] = salt_buf[ 1];
w1[2] = salt_buf[ 2];
@ -619,10 +619,10 @@ DECLSPEC void m31300s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a = a + MD5M_A;
b = b + MD5M_B;
c = c + MD5M_C;
d = d + MD5M_D;
a = a + make_u32x (MD5M_A);
b = b + make_u32x (MD5M_B);
c = c + make_u32x (MD5M_C);
d = d + make_u32x (MD5M_D);
const u32x a1 = a;
const u32x b1 = b;

@ -0,0 +1,750 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
#include M2S(INCLUDE_PATH/inc_cipher_aes.h)
#include M2S(INCLUDE_PATH/inc_cipher_aes.cl)
#endif
typedef struct scrtv2
{
u32 ct_buf[64];
int ct_len;
} scrtv2_t;
DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset);
w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset);
w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset);
w0[0] = hc_bytealign_be_S ( 0, w0[0], offset);
break;
case 1:
w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset);
w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset);
w0[1] = hc_bytealign_be_S ( 0, w0[0], offset);
w0[0] = 0;
break;
case 2:
w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset);
w0[2] = hc_bytealign_be_S ( 0, w0[0], offset);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_bytealign_be_S ( 0, w0[0], offset);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
AES256_set_encrypt_key (aes_ks, hash, s_te0, s_te1, s_te2, s_te3);
shift_buffer_by_offset (hash, pw_len + 4);
hash[0] = hc_swap32_S (pw_len);
hash[1] |= hc_swap32_S (pw[0]);
hash[2] |= hc_swap32_S (pw[1]);
hash[3] |= hc_swap32_S (pw[2]);
AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4);
}
DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w, const u32x pw_len, PRIVATE_AS u32x *h, PRIVATE_AS u32x *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
#if VECT_SIZE == 1
aes256_scrt_format (aes_ks, w, pw_len, h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
#endif
#if VECT_SIZE >= 2
u32 tmp_w[4];
u32 tmp_h[8];
u32 tmp_out[4];
//s0
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s0;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s0;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s0, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s0 = tmp_out[i];
//s1
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s1;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s1;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s1, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s1 = tmp_out[i];
#endif
#if VECT_SIZE >= 4
//s2
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s2;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s2;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s2, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s2 = tmp_out[i];
//s3
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s3;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s3;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s3, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s3 = tmp_out[i];
#endif
#if VECT_SIZE >= 8
//s4
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s4;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s4;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s4, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s4 = tmp_out[i];
//s5
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s5;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s5;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s5, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s5 = tmp_out[i];
//s6
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s6;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s6;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s6, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s6 = tmp_out[i];
//s7
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s7;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s7;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s7, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s7 = tmp_out[i];
#endif
#if VECT_SIZE >= 16
//s8
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s8;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s8;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s8, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s8 = tmp_out[i];
//s9
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s9;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s9;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s9, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s9 = tmp_out[i];
//sa
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sa;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sa;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sa, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sa = tmp_out[i];
//sb
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sb;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sb;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sb, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sb = tmp_out[i];
//sc
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sc;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sc;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sc, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sc = tmp_out[i];
//sd
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sd;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sd;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sd, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sd = tmp_out[i];
//se
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].se;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].se;
aes256_scrt_format (aes_ks, tmp_w, pw_len.se, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].se = tmp_out[i];
//sf
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sf;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sf;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sf, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sf = tmp_out[i];
#endif
}
KERNEL_FQ void m31400_m16 (KERN_ATTR_RULES_ESALT (scrtv2_t))
{
}
KERNEL_FQ void m31400_m08 (KERN_ATTR_RULES_ESALT (scrtv2_t))
{
}
KERNEL_FQ void m31400_m04 (KERN_ATTR_RULES_ESALT (scrtv2_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
*/
#ifdef REAL_SHM
LOCAL_VK u32 s_te0[256];
LOCAL_VK u32 s_te1[256];
LOCAL_VK u32 s_te2[256];
LOCAL_VK u32 s_te3[256];
LOCAL_VK u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{
s_te0[i] = te0[i];
s_te1[i] = te1[i];
s_te2[i] = te2[i];
s_te3[i] = te3[i];
s_te4[i] = te4[i];
}
SYNC_THREADS();
#else
CONSTANT_AS u32a *s_te0 = te0;
CONSTANT_AS u32a *s_te1 = te1;
CONSTANT_AS u32a *s_te2 = te2;
CONSTANT_AS u32a *s_te3 = te3;
CONSTANT_AS u32a *s_te4 = te4;
#endif
if (gid >= GID_CNT) return;
/**
* base
*/
u32 ks[60];
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
u32x wt[4];
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
wt[0] = w0[0];
wt[1] = w0[1];
wt[2] = w0[2];
wt[3] = w0[3];
append_0x80_2x4_VV (w0, w1, out_len);
/**
* sha256
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = out_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
u32x digest[8];
digest[0] = a + make_u32x (SHA256M_A);
digest[1] = b + make_u32x (SHA256M_B);
digest[2] = c + make_u32x (SHA256M_C);
digest[3] = d + make_u32x (SHA256M_D);
digest[4] = e + make_u32x (SHA256M_E);
digest[5] = f + make_u32x (SHA256M_F);
digest[6] = g + make_u32x (SHA256M_G);
digest[7] = h + make_u32x (SHA256M_H);
u32x out[4] = { 0 };
aes256_scrt_format_VV (ks, wt, out_len, digest, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32x r0 = out[DGST_R0];
const u32x r1 = out[DGST_R1];
const u32x r2 = out[DGST_R2];
const u32x r3 = out[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m31400_s16 (KERN_ATTR_RULES_ESALT (scrtv2_t))
{
}
KERNEL_FQ void m31400_s08 (KERN_ATTR_RULES_ESALT (scrtv2_t))
{
}
KERNEL_FQ void m31400_s04 (KERN_ATTR_RULES_ESALT (scrtv2_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
*/
#ifdef REAL_SHM
LOCAL_VK u32 s_te0[256];
LOCAL_VK u32 s_te1[256];
LOCAL_VK u32 s_te2[256];
LOCAL_VK u32 s_te3[256];
LOCAL_VK u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{
s_te0[i] = te0[i];
s_te1[i] = te1[i];
s_te2[i] = te2[i];
s_te3[i] = te3[i];
s_te4[i] = te4[i];
}
SYNC_THREADS();
#else
CONSTANT_AS u32a *s_te0 = te0;
CONSTANT_AS u32a *s_te1 = te1;
CONSTANT_AS u32a *s_te2 = te2;
CONSTANT_AS u32a *s_te3 = te3;
CONSTANT_AS u32a *s_te4 = te4;
#endif
if (gid >= GID_CNT) return;
/**
* base
*/
u32 ks[60];
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
u32x wt[4];
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
wt[0] = w0[0];
wt[1] = w0[1];
wt[2] = w0[2];
wt[3] = w0[3];
append_0x80_2x4_VV (w0, w1, out_len);
/**
* sha256
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = out_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
u32x digest[8];
digest[0] = a + make_u32x (SHA256M_A);
digest[1] = b + make_u32x (SHA256M_B);
digest[2] = c + make_u32x (SHA256M_C);
digest[3] = d + make_u32x (SHA256M_D);
digest[4] = e + make_u32x (SHA256M_E);
digest[5] = f + make_u32x (SHA256M_F);
digest[6] = g + make_u32x (SHA256M_G);
digest[7] = h + make_u32x (SHA256M_H);
u32x out[4] = { 0 };
aes256_scrt_format_VV (ks, wt, out_len, digest, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32x r0 = out[DGST_R0];
const u32x r1 = out[DGST_R1];
const u32x r2 = out[DGST_R2];
const u32x r3 = out[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -184,10 +184,10 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_RULES_ESALT (scrtv2_t))
* base
*/
COPY_PW (pws[gid]);
u32 ks[60];
COPY_PW (pws[gid]);
/**
* loop
*/
@ -208,7 +208,7 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_RULES_ESALT (scrtv2_t))
u32 out[4] = { 0 };
aes256_scrt_format (ks, tmp.i, tmp.pw_len, ctx.h, out,s_te0, s_te1, s_te2, s_te3, s_te4);
aes256_scrt_format (ks, tmp.i, tmp.pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32 r0 = out[DGST_R0];
const u32 r1 = out[DGST_R1];
@ -280,10 +280,10 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_RULES_ESALT (scrtv2_t))
* base
*/
COPY_PW (pws[gid]);
u32 ks[60];
COPY_PW (pws[gid]);
/**
* loop
*/
@ -304,7 +304,7 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_RULES_ESALT (scrtv2_t))
u32 out[4] = { 0 };
aes256_scrt_format (ks, tmp.i, tmp.pw_len, ctx.h, out,s_te0, s_te1, s_te2, s_te3, s_te4);
aes256_scrt_format (ks, tmp.i, tmp.pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32 r0 = out[DGST_R0];
const u32 r1 = out[DGST_R1];

@ -0,0 +1,901 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
#include M2S(INCLUDE_PATH/inc_cipher_aes.cl)
#endif
typedef struct scrtv2
{
u32 ct_buf[64];
int ct_len;
} scrtv2_t;
DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset)
{
const int offset_switch = offset / 4;
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset);
w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset);
w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset);
w0[0] = hc_bytealign_be_S ( 0, w0[0], offset);
break;
case 1:
w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset);
w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset);
w0[1] = hc_bytealign_be_S ( 0, w0[0], offset);
w0[0] = 0;
break;
case 2:
w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset);
w0[2] = hc_bytealign_be_S ( 0, w0[0], offset);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_bytealign_be_S ( 0, w0[0], offset);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8));
#endif
switch (offset_switch)
{
case 0:
w0[3] = hc_byte_perm_S (w0[3], w0[2], selector);
w0[2] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[1] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[0] = hc_byte_perm_S (w0[0], 0, selector);
break;
case 1:
w0[3] = hc_byte_perm_S (w0[2], w0[1], selector);
w0[2] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[1] = hc_byte_perm_S (w0[0], 0, selector);
w0[0] = 0;
break;
case 2:
w0[3] = hc_byte_perm_S (w0[1], w0[0], selector);
w0[2] = hc_byte_perm_S (w0[0], 0, selector);
w0[1] = 0;
w0[0] = 0;
break;
case 3:
w0[3] = hc_byte_perm_S (w0[0], 0, selector);
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
default:
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}
DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
AES256_set_encrypt_key (aes_ks, hash, s_te0, s_te1, s_te2, s_te3);
shift_buffer_by_offset (hash, pw_len + 4);
hash[0] = hc_swap32_S (pw_len);
hash[1] |= hc_swap32_S (pw[0]);
hash[2] |= hc_swap32_S (pw[1]);
hash[3] |= hc_swap32_S (pw[2]);
AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4);
}
DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w, const u32x pw_len, PRIVATE_AS u32x *h, PRIVATE_AS u32x *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
#if VECT_SIZE == 1
aes256_scrt_format (aes_ks, w, pw_len, h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
#endif
#if VECT_SIZE >= 2
u32 tmp_w[4];
u32 tmp_h[8];
u32 tmp_out[4];
//s0
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s0;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s0;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s0, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s0 = tmp_out[i];
//s1
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s1;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s1;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s1, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s1 = tmp_out[i];
#endif
#if VECT_SIZE >= 4
//s2
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s2;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s2;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s2, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s2 = tmp_out[i];
//s3
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s3;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s3;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s3, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s3 = tmp_out[i];
#endif
#if VECT_SIZE >= 8
//s4
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s4;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s4;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s4, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s4 = tmp_out[i];
//s5
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s5;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s5;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s5, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s5 = tmp_out[i];
//s6
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s6;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s6;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s6, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s6 = tmp_out[i];
//s7
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s7;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s7;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s7, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s7 = tmp_out[i];
#endif
#if VECT_SIZE >= 16
//s8
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s8;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s8;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s8, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s8 = tmp_out[i];
//s9
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s9;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s9;
aes256_scrt_format (aes_ks, tmp_w, pw_len.s9, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s9 = tmp_out[i];
//sa
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sa;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sa;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sa, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sa = tmp_out[i];
//sb
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sb;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sb;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sb, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sb = tmp_out[i];
//sc
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sc;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sc;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sc, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sc = tmp_out[i];
//sd
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sd;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sd;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sd, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sd = tmp_out[i];
//se
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].se;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].se;
aes256_scrt_format (aes_ks, tmp_w, pw_len.se, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].se = tmp_out[i];
//sf
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sf;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sf;
aes256_scrt_format (aes_ks, tmp_w, pw_len.sf, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sf = tmp_out[i];
#endif
}
KERNEL_FQ void m31400_m04 (KERN_ATTR_ESALT (scrtv2_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
*/
#ifdef REAL_SHM
LOCAL_VK u32 s_te0[256];
LOCAL_VK u32 s_te1[256];
LOCAL_VK u32 s_te2[256];
LOCAL_VK u32 s_te3[256];
LOCAL_VK u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{
s_te0[i] = te0[i];
s_te1[i] = te1[i];
s_te2[i] = te2[i];
s_te3[i] = te3[i];
s_te4[i] = te4[i];
}
SYNC_THREADS();
#else
CONSTANT_AS u32a *s_te0 = te0;
CONSTANT_AS u32a *s_te1 = te1;
CONSTANT_AS u32a *s_te2 = te2;
CONSTANT_AS u32a *s_te3 = te3;
CONSTANT_AS u32a *s_te4 = te4;
#endif
if (gid >= GID_CNT) return;
/**
* base
*/
u32 ks[60];
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
u32x wt[4];
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
wt[0] = w0[0];
wt[1] = w0[1];
wt[2] = w0[2];
wt[3] = w0[3];
w0[0] = hc_swap32 (w0[0]);
w0[1] = hc_swap32 (w0[1]);
w0[2] = hc_swap32 (w0[2]);
w0[3] = hc_swap32 (w0[3]);
w1[0] = hc_swap32 (w1[0]);
w1[1] = hc_swap32 (w1[1]);
w1[2] = hc_swap32 (w1[2]);
w1[3] = hc_swap32 (w1[3]);
w2[0] = hc_swap32 (w2[0]);
w2[1] = hc_swap32 (w2[1]);
w2[2] = hc_swap32 (w2[2]);
w2[3] = hc_swap32 (w2[3]);
w3[0] = hc_swap32 (w3[0]);
w3[1] = hc_swap32 (w3[1]);
w3[2] = hc_swap32 (w3[2]);
w3[3] = hc_swap32 (w3[3]);
append_0x80_4x4_VV (w0, w1, w2, w3, pw_len ^ 3);
/**
* sha256
*/
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = 0;
u32x wf_t = pw_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
u32x digest[8];
digest[0] = a + make_u32x (SHA256M_A);
digest[1] = b + make_u32x (SHA256M_B);
digest[2] = c + make_u32x (SHA256M_C);
digest[3] = d + make_u32x (SHA256M_D);
digest[4] = e + make_u32x (SHA256M_E);
digest[5] = f + make_u32x (SHA256M_F);
digest[6] = g + make_u32x (SHA256M_G);
digest[7] = h + make_u32x (SHA256M_H);
u32x out[4] = { 0 };
aes256_scrt_format_VV (ks, wt, pw_len, digest, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32x r0 = out[DGST_R0];
const u32x r1 = out[DGST_R1];
const u32x r2 = out[DGST_R2];
const u32x r3 = out[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m31400_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m31400_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m31400_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
*/
#ifdef REAL_SHM
LOCAL_VK u32 s_te0[256];
LOCAL_VK u32 s_te1[256];
LOCAL_VK u32 s_te2[256];
LOCAL_VK u32 s_te3[256];
LOCAL_VK u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{
s_te0[i] = te0[i];
s_te1[i] = te1[i];
s_te2[i] = te2[i];
s_te3[i] = te3[i];
s_te4[i] = te4[i];
}
SYNC_THREADS();
#else
CONSTANT_AS u32a *s_te0 = te0;
CONSTANT_AS u32a *s_te1 = te1;
CONSTANT_AS u32a *s_te2 = te2;
CONSTANT_AS u32a *s_te3 = te3;
CONSTANT_AS u32a *s_te4 = te4;
#endif
if (gid >= GID_CNT) return;
/**
* base
*/
u32 ks[60];
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
u32x wt[4];
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
wt[0] = w0[0];
wt[1] = w0[1];
wt[2] = w0[2];
wt[3] = w0[3];
w0[0] = hc_swap32 (w0[0]);
w0[1] = hc_swap32 (w0[1]);
w0[2] = hc_swap32 (w0[2]);
w0[3] = hc_swap32 (w0[3]);
w1[0] = hc_swap32 (w1[0]);
w1[1] = hc_swap32 (w1[1]);
w1[2] = hc_swap32 (w1[2]);
w1[3] = hc_swap32 (w1[3]);
w2[0] = hc_swap32 (w2[0]);
w2[1] = hc_swap32 (w2[1]);
w2[2] = hc_swap32 (w2[2]);
w2[3] = hc_swap32 (w2[3]);
w3[0] = hc_swap32 (w3[0]);
w3[1] = hc_swap32 (w3[1]);
w3[2] = hc_swap32 (w3[2]);
w3[3] = hc_swap32 (w3[3]);
append_0x80_4x4_VV (w0, w1, w2, w3, pw_len ^ 3);
/**
* sha256
*/
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = 0;
u32x wf_t = pw_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
u32x digest[8];
digest[0] = a + make_u32x (SHA256M_A);
digest[1] = b + make_u32x (SHA256M_B);
digest[2] = c + make_u32x (SHA256M_C);
digest[3] = d + make_u32x (SHA256M_D);
digest[4] = e + make_u32x (SHA256M_E);
digest[5] = f + make_u32x (SHA256M_F);
digest[6] = g + make_u32x (SHA256M_G);
digest[7] = h + make_u32x (SHA256M_H);
u32x out[4] = { 0 };
aes256_scrt_format_VV (ks, wt, pw_len, digest, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32x r0 = out[DGST_R0];
const u32x r1 = out[DGST_R1];
const u32x r2 = out[DGST_R2];
const u32x r3 = out[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m31400_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m31400_s16 (KERN_ATTR_BASIC ())
{
}

@ -184,14 +184,16 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_ESALT (scrtv2_t))
* base
*/
u32 wt[3];
u32 ks[60];
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
u32 ks[60];
/**
* loop
*/
@ -202,19 +204,17 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_ESALT (scrtv2_t))
sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
u32 pw_candidate[3];
pw_candidate[0] = hc_swap32_S (ctx.w0[0]);
pw_candidate[1] = hc_swap32_S (ctx.w0[1]);
pw_candidate[2] = hc_swap32_S (ctx.w0[2]);
wt[0] = hc_swap32_S (ctx.w0[0]);
wt[1] = hc_swap32_S (ctx.w0[1]);
wt[2] = hc_swap32_S (ctx.w0[2]);
u32 pw_len=ctx.len;
u32 pw_len = ctx.len;
sha256_final (&ctx);
u32 out[4] = { 0 };
aes256_scrt_format (ks, pw_candidate, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
aes256_scrt_format (ks, wt, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32 r0 = out[DGST_R0];
const u32 r1 = out[DGST_R1];
@ -286,14 +286,16 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_ESALT (scrtv2_t))
* base
*/
u32 wt[3];
u32 ks[60];
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
u32 ks[60];
/**
* loop
*/
@ -304,19 +306,17 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_ESALT (scrtv2_t))
sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
u32 pw_candidate[3];
pw_candidate[0] = hc_swap32_S (ctx.w0[0]);
pw_candidate[1] = hc_swap32_S (ctx.w0[1]);
pw_candidate[2] = hc_swap32_S (ctx.w0[2]);
wt[0] = hc_swap32_S (ctx.w0[0]);
wt[1] = hc_swap32_S (ctx.w0[1]);
wt[2] = hc_swap32_S (ctx.w0[2]);
u32 pw_len=ctx.len;
u32 pw_len = ctx.len;
sha256_final (&ctx);
u32 out[4] = { 0 };
aes256_scrt_format (ks, pw_candidate, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
aes256_scrt_format (ks, wt, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32 r0 = out[DGST_R0];
const u32 r1 = out[DGST_R1];

File diff suppressed because it is too large Load Diff

@ -1,7 +1,7 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
@ -142,27 +142,27 @@ DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w,
#endif
#if VECT_SIZE >= 2
u32 tmp_w[64];
u32 tmp_w[4];
u32 tmp_h[8];
u32 tmp_out[4];
//s0
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s0;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s0;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s0;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s0;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s0 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s0 = tmp_out[i];
//s1
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s1;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s1;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s1;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s1;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s1 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s1 = tmp_out[i];
#endif
@ -170,21 +170,21 @@ DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w,
//s2
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s2;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s2;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s2;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s2;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s2 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s2 = tmp_out[i];
//s3
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s3;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s3;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s3;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s3;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s3 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s3 = tmp_out[i];
#endif
@ -192,39 +192,39 @@ DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w,
//s4
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s4;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s4;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s4;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s4;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s4 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s4 = tmp_out[i];
//s5
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s5;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s5;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s5;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s5;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s5 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s5 = tmp_out[i];
//s6
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s6;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s6;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s6;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s6;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s6 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s6 = tmp_out[i];
//s7
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s7;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s7;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s7;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s7;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s7 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s7 = tmp_out[i];
#endif
@ -232,75 +232,75 @@ DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w,
//s8
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s8;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s8;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s8;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s8;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s8 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s8 = tmp_out[i];
//s9
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s9;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s9;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].s9;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s9;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].s9 = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].s9 = tmp_out[i];
//sa
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sa;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sa;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sa;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sa;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sa = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].sa = tmp_out[i];
//sb
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sb;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sb;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sb;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sb;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sb = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].sb = tmp_out[i];
//sc
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sc;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sc;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sc;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sc;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sc = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].sc = tmp_out[i];
//sd
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sd;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sd;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sd;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sd;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sd = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].sd = tmp_out[i];
//se
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].se;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].se;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].se;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].se;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].se = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].se = tmp_out[i];
//sf
for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sf;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sf;
for (u32 i = 0; i < 4; i++) tmp_w[i] = w[i].sf;
for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sf;
aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4);
for (u32 i = 0; i < 4; i++) out[i].sf = tmp_out[i];
for (u32 i = 0; i < 4; i++) out[i].sf = tmp_out[i];
#endif
}
@ -354,10 +354,12 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_VECTOR_ESALT (scrtv2_t))
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 ks[60];
u32x w[64] = {0};
const u32 pw_len = pws[gid].pw_len;
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
@ -385,11 +387,9 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_VECTOR_ESALT (scrtv2_t))
sha256_final_vector (&ctx);
u32x out[4] = {0};
u32x out[4] = { 0 };
u32 aes_ks[60];
aes256_scrt_format_VV (aes_ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
aes256_scrt_format_VV (ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32x r0 = out[DGST_R0];
const u32x r1 = out[DGST_R1];
@ -461,9 +461,11 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_VECTOR_ESALT (scrtv2_t))
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 ks[60];
u32x w[64] = {0};
u32x w[64] = { 0 };
const u32 pw_len = pws[gid].pw_len;
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
@ -492,11 +494,9 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_VECTOR_ESALT (scrtv2_t))
sha256_final_vector (&ctx);
u32x out[4] = {0};
u32 aes_ks[60];
u32x out[4] = { 0 };
aes256_scrt_format_VV (aes_ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
aes256_scrt_format_VV (ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32x r0 = out[DGST_R0];
const u32x r1 = out[DGST_R1];

@ -0,0 +1,141 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md4.cl)
#endif
#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl)
#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl)
#ifdef KERNEL_STATIC
DECLSPEC u8 hex_convert (const u8 c)
{
return (c & 15) + (c >> 6) * 9;
}
DECLSPEC u8 hex_to_u8 (PRIVATE_AS const u8 *hex)
{
u8 v = 0;
v |= ((u8) hex_convert (hex[1]) << 0);
v |= ((u8) hex_convert (hex[0]) << 4);
return (v);
}
#endif
typedef struct dcc_tmp
{
u32 digest_buf[4];
} dcc_tmp_t;
KERNEL_FQ void m31500_init (KERN_ATTR_TMPS (dcc_tmp_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
u32 in[16];
in[ 0] = pws[gid].i[ 0];
in[ 1] = pws[gid].i[ 1];
in[ 2] = pws[gid].i[ 2];
in[ 3] = pws[gid].i[ 3];
in[ 4] = pws[gid].i[ 4];
in[ 5] = pws[gid].i[ 5];
in[ 6] = pws[gid].i[ 6];
in[ 7] = pws[gid].i[ 7];
u32 out[4];
PRIVATE_AS u8 *in_ptr = (PRIVATE_AS u8 *) in;
PRIVATE_AS u8 *out_ptr = (PRIVATE_AS u8 *) out;
for (int i = 0, j = 0; i < 16; i += 1, j += 2)
{
out_ptr[i] = hex_to_u8 (in_ptr + j);
}
tmps[gid].digest_buf[0] = out[0];
tmps[gid].digest_buf[1] = out[1];
tmps[gid].digest_buf[2] = out[2];
tmps[gid].digest_buf[3] = out[3];
}
KERNEL_FQ void m31500_loop (KERN_ATTR_TMPS (dcc_tmp_t))
{
}
KERNEL_FQ void m31500_comp (KERN_ATTR_TMPS (dcc_tmp_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* salt
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
const u32 a = tmps[gid].digest_buf[0];
const u32 b = tmps[gid].digest_buf[1];
const u32 c = tmps[gid].digest_buf[2];
const u32 d = tmps[gid].digest_buf[3];
md4_ctx_t ctx;
md4_init (&ctx);
ctx.w0[0] = a;
ctx.w0[1] = b;
ctx.w0[2] = c;
ctx.w0[3] = d;
ctx.len = 16;
md4_update_utf16le (&ctx, s, salt_len);
md4_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -0,0 +1,335 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ void m31700_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc uppercase table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len;
u32 salt1_buf[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1)
{
salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx];
}
const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 salt2_buf[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
u32 a = ctx0.h[0];
u32 b = ctx0.h[1];
u32 c = ctx0.h[2];
u32 d = ctx0.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_update (&ctx, salt1_buf, salt1_len);
md5_final (&ctx);
a = ctx.h[0];
b = ctx.h[1];
c = ctx.h[2];
d = ctx.h[3];
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_update (&ctx, salt2_buf, salt2_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m31700_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc uppercase table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len;
u32 salt1_buf[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1)
{
salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx];
}
const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 salt2_buf[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
u32 a = ctx0.h[0];
u32 b = ctx0.h[1];
u32 c = ctx0.h[2];
u32 d = ctx0.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_update (&ctx, salt1_buf, salt1_len);
md5_final (&ctx);
a = ctx.h[0];
b = ctx.h[1];
c = ctx.h[2];
d = ctx.h[3];
md5_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
ctx.len = 32;
md5_update (&ctx, salt2_buf, salt2_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save