diff --git a/OpenCL/inc_hash_blake2s.cl b/OpenCL/inc_hash_blake2s.cl new file mode 100644 index 000000000..c10006068 --- /dev/null +++ b/OpenCL/inc_hash_blake2s.cl @@ -0,0 +1,702 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.h" +#include "inc_common.h" +#include "inc_hash_blake2s.h" + +DECLSPEC u32 blake2s_rot16_S (const u32 a) +{ + vconv32_t in; + + in.v32 = a; + + vconv32_t out; + + out.v16.a = in.v16.b; + out.v16.b = in.v16.a; + + return out.v32; +} + +DECLSPEC u32x blake2s_rot16 (const u32x a) +{ + u32x r; + + #if VECT_SIZE == 1 + r = blake2s_rot16_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = blake2s_rot16_S (a.s0); + r.s1 = blake2s_rot16_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = blake2s_rot16_S (a.s2); + r.s3 = blake2s_rot16_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = blake2s_rot16_S (a.s4); + r.s5 = blake2s_rot16_S (a.s5); + r.s6 = blake2s_rot16_S (a.s6); + r.s7 = blake2s_rot16_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = blake2s_rot16_S (a.s8); + r.s9 = blake2s_rot16_S (a.s9); + r.sa = blake2s_rot16_S (a.sa); + r.sb = blake2s_rot16_S (a.sb); + r.sc = blake2s_rot16_S (a.sc); + r.sd = blake2s_rot16_S (a.sd); + r.se = blake2s_rot16_S (a.se); + r.sf = blake2s_rot16_S (a.sf); + #endif + + return r; +} + +DECLSPEC u32 blake2s_rot08_S (const u32 a) +{ + #if defined IS_NV + + vconv32_t in; + + in.v32 = a; + + vconv32_t out; + + out.v32 = hc_byte_perm_S (in.v32, in.v32, 0x0321); + + return out.v32; + + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + + vconv32_t in; + + in.v32 = a; + + vconv32_t out; + + out.v32 = hc_byte_perm_S (in.v32, in.v32, 0x00030201); + + return out.v32; + + #else + + return hc_rotr32_S (a, 8); + + #endif +} + +DECLSPEC u32x blake2s_rot08 (const u32x a) +{ + u32x r; + + #if VECT_SIZE == 1 + r = blake2s_rot08_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = blake2s_rot08_S (a.s0); + r.s1 = blake2s_rot08_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = blake2s_rot08_S (a.s2); + r.s3 = blake2s_rot08_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = blake2s_rot08_S (a.s4); + r.s5 = blake2s_rot08_S (a.s5); + r.s6 = blake2s_rot08_S (a.s6); + r.s7 = blake2s_rot08_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = blake2s_rot08_S (a.s8); + r.s9 = blake2s_rot08_S (a.s9); + r.sa = blake2s_rot08_S (a.sa); + r.sb = blake2s_rot08_S (a.sb); + r.sc = blake2s_rot08_S (a.sc); + r.sd = blake2s_rot08_S (a.sd); + r.se = blake2s_rot08_S (a.se); + r.sf = blake2s_rot08_S (a.sf); + #endif + + return r; +} + +DECLSPEC void blake2s_transform (PRIVATE_AS u32 *h, PRIVATE_AS const u32 *m, const int len, const u32 f0) +{ + const u32 t0 = len; + + u32 v[16]; + + v[ 0] = h[0]; + v[ 1] = h[1]; + v[ 2] = h[2]; + v[ 3] = h[3]; + v[ 4] = h[4]; + v[ 5] = h[5]; + v[ 6] = h[6]; + v[ 7] = h[7]; + v[ 8] = BLAKE2S_IV_00; + v[ 9] = BLAKE2S_IV_01; + v[10] = BLAKE2S_IV_02; + v[11] = BLAKE2S_IV_03; + v[12] = BLAKE2S_IV_04 ^ t0; + v[13] = BLAKE2S_IV_05; // ^ t1; + v[14] = BLAKE2S_IV_06 ^ f0; + v[15] = BLAKE2S_IV_07; // ^ f1; + + BLAKE2S_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + BLAKE2S_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); + BLAKE2S_ROUND (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); + BLAKE2S_ROUND ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); + BLAKE2S_ROUND ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); + BLAKE2S_ROUND ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); + BLAKE2S_ROUND (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); + BLAKE2S_ROUND (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); + BLAKE2S_ROUND ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); + BLAKE2S_ROUND (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0); + + h[0] = h[0] ^ v[0] ^ v[ 8]; + h[1] = h[1] ^ v[1] ^ v[ 9]; + h[2] = h[2] ^ v[2] ^ v[10]; + h[3] = h[3] ^ v[3] ^ v[11]; + h[4] = h[4] ^ v[4] ^ v[12]; + h[5] = h[5] ^ v[5] ^ v[13]; + h[6] = h[6] ^ v[6] ^ v[14]; + h[7] = h[7] ^ v[7] ^ v[15]; +} + +DECLSPEC void blake2s_init (PRIVATE_AS blake2s_ctx_t *ctx) +{ + ctx->h[0] = BLAKE2S_IV_00 ^ 0x01010020; // default output length: 0x20 = 32 bytes + ctx->h[1] = BLAKE2S_IV_01; + ctx->h[2] = BLAKE2S_IV_02; + ctx->h[3] = BLAKE2S_IV_03; + ctx->h[4] = BLAKE2S_IV_04; + ctx->h[5] = BLAKE2S_IV_05; + ctx->h[6] = BLAKE2S_IV_06; + ctx->h[7] = BLAKE2S_IV_07; + + ctx->m[ 0] = 0; + ctx->m[ 1] = 0; + ctx->m[ 2] = 0; + ctx->m[ 3] = 0; + ctx->m[ 4] = 0; + ctx->m[ 5] = 0; + ctx->m[ 6] = 0; + ctx->m[ 7] = 0; + ctx->m[ 8] = 0; + ctx->m[ 9] = 0; + ctx->m[10] = 0; + ctx->m[11] = 0; + ctx->m[12] = 0; + ctx->m[13] = 0; + ctx->m[14] = 0; + ctx->m[15] = 0; + + ctx->len = 0; +} + +DECLSPEC void blake2s_update_64 (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len) +{ + if (len == 0) return; + + const int pos = ctx->len & 63; + + if (pos == 0) + { + if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform + { + blake2s_transform (ctx->h, ctx->m, ctx->len, BLAKE2S_UPDATE); + } + + ctx->m[ 0] = w0[0]; + ctx->m[ 1] = w0[1]; + ctx->m[ 2] = w0[2]; + ctx->m[ 3] = w0[3]; + ctx->m[ 4] = w1[0]; + ctx->m[ 5] = w1[1]; + ctx->m[ 6] = w1[2]; + ctx->m[ 7] = w1[3]; + ctx->m[ 8] = w2[0]; + ctx->m[ 9] = w2[1]; + ctx->m[10] = w2[2]; + ctx->m[11] = w2[3]; + ctx->m[12] = w3[0]; + ctx->m[13] = w3[1]; + ctx->m[14] = w3[2]; + ctx->m[15] = w3[3]; + } + else + { + if ((pos + len) <= 64) + { + switch_buffer_by_offset_le_S (w0, w1, w2, w3, pos); + + ctx->m[ 0] |= w0[0]; + ctx->m[ 1] |= w0[1]; + ctx->m[ 2] |= w0[2]; + ctx->m[ 3] |= w0[3]; + ctx->m[ 4] |= w1[0]; + ctx->m[ 5] |= w1[1]; + ctx->m[ 6] |= w1[2]; + ctx->m[ 7] |= w1[3]; + ctx->m[ 8] |= w2[0]; + ctx->m[ 9] |= w2[1]; + ctx->m[10] |= w2[2]; + ctx->m[11] |= w2[3]; + ctx->m[12] |= w3[0]; + ctx->m[13] |= w3[1]; + ctx->m[14] |= w3[2]; + ctx->m[15] |= w3[3]; + } + else + { + u32 c0[4] = { 0 }; + u32 c1[4] = { 0 }; + u32 c2[4] = { 0 }; + u32 c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le_S (w0, w1, w2, w3, c0, c1, c2, c3, pos); + + ctx->m[ 0] |= w0[0]; + ctx->m[ 1] |= w0[1]; + ctx->m[ 2] |= w0[2]; + ctx->m[ 3] |= w0[3]; + ctx->m[ 4] |= w1[0]; + ctx->m[ 5] |= w1[1]; + ctx->m[ 6] |= w1[2]; + ctx->m[ 7] |= w1[3]; + ctx->m[ 8] |= w2[0]; + ctx->m[ 9] |= w2[1]; + ctx->m[10] |= w2[2]; + ctx->m[11] |= w2[3]; + ctx->m[12] |= w3[0]; + ctx->m[13] |= w3[1]; + ctx->m[14] |= w3[2]; + ctx->m[15] |= w3[3]; + + // len must be a multiple of 64 (not ctx->len) for BLAKE2S_UPDATE: + + const u32 cur_len = ((ctx->len + len) / 64) * 64; + + blake2s_transform (ctx->h, ctx->m, cur_len, BLAKE2S_UPDATE); + + ctx->m[ 0] = c0[0]; + ctx->m[ 1] = c0[1]; + ctx->m[ 2] = c0[2]; + ctx->m[ 3] = c0[3]; + ctx->m[ 4] = c1[0]; + ctx->m[ 5] = c1[1]; + ctx->m[ 6] = c1[2]; + ctx->m[ 7] = c1[3]; + ctx->m[ 8] = c2[0]; + ctx->m[ 9] = c2[1]; + ctx->m[10] = c2[2]; + ctx->m[11] = c2[3]; + ctx->m[12] = c3[0]; + ctx->m[13] = c3[1]; + ctx->m[14] = c3[2]; + ctx->m[15] = c3[3]; + } + } + + ctx->len += len; +} + +DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + const int limit = (const int) len - 64; // int type needed, could be negative + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + blake2s_update_64 (ctx, w0, w1, w2, w3, 64); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + blake2s_update_64 (ctx, w0, w1, w2, w3, len - (u32) pos1); +} + +DECLSPEC void blake2s_update_global (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + const int limit = (const int) len - 64; // int type needed, could be negative + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + blake2s_update_64 (ctx, w0, w1, w2, w3, 64); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + blake2s_update_64 (ctx, w0, w1, w2, w3, len - (u32) pos1); +} + +DECLSPEC void blake2s_final (PRIVATE_AS blake2s_ctx_t *ctx) +{ + blake2s_transform (ctx->h, ctx->m, ctx->len, BLAKE2S_FINAL); +} + +DECLSPEC void blake2s_transform_vector (PRIVATE_AS u32x *h, PRIVATE_AS const u32x *m, const u32x len, const u32 f0) +{ + const u32x t0 = len; + + u32x v[16]; + + v[ 0] = h[0]; + v[ 1] = h[1]; + v[ 2] = h[2]; + v[ 3] = h[3]; + v[ 4] = h[4]; + v[ 5] = h[5]; + v[ 6] = h[6]; + v[ 7] = h[7]; + v[ 8] = BLAKE2S_IV_00; + v[ 9] = BLAKE2S_IV_01; + v[10] = BLAKE2S_IV_02; + v[11] = BLAKE2S_IV_03; + v[12] = BLAKE2S_IV_04 ^ t0; + v[13] = BLAKE2S_IV_05; // ^ t1; + v[14] = BLAKE2S_IV_06 ^ f0; + v[15] = BLAKE2S_IV_07; // ^ f1; + + BLAKE2S_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + BLAKE2S_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); + BLAKE2S_ROUND_VECTOR (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); + BLAKE2S_ROUND_VECTOR ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); + BLAKE2S_ROUND_VECTOR ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); + BLAKE2S_ROUND_VECTOR ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); + BLAKE2S_ROUND_VECTOR (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); + BLAKE2S_ROUND_VECTOR (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); + BLAKE2S_ROUND_VECTOR ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); + BLAKE2S_ROUND_VECTOR (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0); + + h[0] = h[0] ^ v[0] ^ v[ 8]; + h[1] = h[1] ^ v[1] ^ v[ 9]; + h[2] = h[2] ^ v[2] ^ v[10]; + h[3] = h[3] ^ v[3] ^ v[11]; + h[4] = h[4] ^ v[4] ^ v[12]; + h[5] = h[5] ^ v[5] ^ v[13]; + h[6] = h[6] ^ v[6] ^ v[14]; + h[7] = h[7] ^ v[7] ^ v[15]; +} + +DECLSPEC void blake2s_init_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx) +{ + ctx->h[0] = BLAKE2S_IV_00 ^ 0x01010020; // default output length: 0x20 = 32 bytes + ctx->h[1] = BLAKE2S_IV_01; + ctx->h[2] = BLAKE2S_IV_02; + ctx->h[3] = BLAKE2S_IV_03; + ctx->h[4] = BLAKE2S_IV_04; + ctx->h[5] = BLAKE2S_IV_05; + ctx->h[6] = BLAKE2S_IV_06; + ctx->h[7] = BLAKE2S_IV_07; + + ctx->m[ 0] = 0; + ctx->m[ 1] = 0; + ctx->m[ 2] = 0; + ctx->m[ 3] = 0; + ctx->m[ 4] = 0; + ctx->m[ 5] = 0; + ctx->m[ 6] = 0; + ctx->m[ 7] = 0; + ctx->m[ 8] = 0; + ctx->m[ 9] = 0; + ctx->m[10] = 0; + ctx->m[11] = 0; + ctx->m[12] = 0; + ctx->m[13] = 0; + ctx->m[14] = 0; + ctx->m[15] = 0; + + ctx->len = 0; +} + +DECLSPEC void blake2s_init_vector_from_scalar (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS blake2s_ctx_t *ctx0) +{ + ctx->h[0] = ctx0->h[0]; + ctx->h[1] = ctx0->h[1]; + ctx->h[2] = ctx0->h[2]; + ctx->h[3] = ctx0->h[3]; + ctx->h[4] = ctx0->h[4]; + ctx->h[5] = ctx0->h[5]; + ctx->h[6] = ctx0->h[6]; + ctx->h[7] = ctx0->h[7]; + + ctx->m[ 0] = ctx0->m[ 0]; + ctx->m[ 1] = ctx0->m[ 1]; + ctx->m[ 2] = ctx0->m[ 2]; + ctx->m[ 3] = ctx0->m[ 3]; + ctx->m[ 4] = ctx0->m[ 4]; + ctx->m[ 5] = ctx0->m[ 5]; + ctx->m[ 6] = ctx0->m[ 6]; + ctx->m[ 7] = ctx0->m[ 7]; + ctx->m[ 8] = ctx0->m[ 8]; + ctx->m[ 9] = ctx0->m[ 9]; + ctx->m[10] = ctx0->m[10]; + ctx->m[11] = ctx0->m[11]; + ctx->m[12] = ctx0->m[12]; + ctx->m[13] = ctx0->m[13]; + ctx->m[14] = ctx0->m[14]; + ctx->m[15] = ctx0->m[15]; + + ctx->len = ctx0->len; +} + +DECLSPEC void blake2s_update_vector_64 (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len) +{ + if (len == 0) return; + + const int pos = ctx->len & 63; + + if (pos == 0) + { + if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform + { + blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2S_UPDATE); + } + + ctx->m[ 0] = w0[0]; + ctx->m[ 1] = w0[1]; + ctx->m[ 2] = w0[2]; + ctx->m[ 3] = w0[3]; + ctx->m[ 4] = w1[0]; + ctx->m[ 5] = w1[1]; + ctx->m[ 6] = w1[2]; + ctx->m[ 7] = w1[3]; + ctx->m[ 8] = w2[0]; + ctx->m[ 9] = w2[1]; + ctx->m[10] = w2[2]; + ctx->m[11] = w2[3]; + ctx->m[12] = w3[0]; + ctx->m[13] = w3[1]; + ctx->m[14] = w3[2]; + ctx->m[15] = w3[3]; + } + else + { + if ((pos + len) <= 64) + { + switch_buffer_by_offset_le (w0, w1, w2, w3, pos); + + ctx->m[ 0] |= w0[0]; + ctx->m[ 1] |= w0[1]; + ctx->m[ 2] |= w0[2]; + ctx->m[ 3] |= w0[3]; + ctx->m[ 4] |= w1[0]; + ctx->m[ 5] |= w1[1]; + ctx->m[ 6] |= w1[2]; + ctx->m[ 7] |= w1[3]; + ctx->m[ 8] |= w2[0]; + ctx->m[ 9] |= w2[1]; + ctx->m[10] |= w2[2]; + ctx->m[11] |= w2[3]; + ctx->m[12] |= w3[0]; + ctx->m[13] |= w3[1]; + ctx->m[14] |= w3[2]; + ctx->m[15] |= w3[3]; + } + else + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (w0, w1, w2, w3, c0, c1, c2, c3, pos); + + ctx->m[ 0] |= w0[0]; + ctx->m[ 1] |= w0[1]; + ctx->m[ 2] |= w0[2]; + ctx->m[ 3] |= w0[3]; + ctx->m[ 4] |= w1[0]; + ctx->m[ 5] |= w1[1]; + ctx->m[ 6] |= w1[2]; + ctx->m[ 7] |= w1[3]; + ctx->m[ 8] |= w2[0]; + ctx->m[ 9] |= w2[1]; + ctx->m[10] |= w2[2]; + ctx->m[11] |= w2[3]; + ctx->m[12] |= w3[0]; + ctx->m[13] |= w3[1]; + ctx->m[14] |= w3[2]; + ctx->m[15] |= w3[3]; + + // len must be a multiple of 64 (not ctx->len) for BLAKE2S_UPDATE: + + const u32x cur_len = ((ctx->len + len) / 64) * 64; + + blake2s_transform_vector (ctx->h, ctx->m, cur_len, BLAKE2S_UPDATE); + + ctx->m[ 0] = c0[0]; + ctx->m[ 1] = c0[1]; + ctx->m[ 2] = c0[2]; + ctx->m[ 3] = c0[3]; + ctx->m[ 4] = c1[0]; + ctx->m[ 5] = c1[1]; + ctx->m[ 6] = c1[2]; + ctx->m[ 7] = c1[3]; + ctx->m[ 8] = c2[0]; + ctx->m[ 9] = c2[1]; + ctx->m[10] = c2[2]; + ctx->m[11] = c2[3]; + ctx->m[12] = c3[0]; + ctx->m[13] = c3[1]; + ctx->m[14] = c3[2]; + ctx->m[15] = c3[3]; + } + } + + ctx->len += len; +} + +DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len) +{ + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + const int limit = (const int) len - 64; // int type needed, could be negative + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + blake2s_update_vector_64 (ctx, w0, w1, w2, w3, 64); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + blake2s_update_vector_64 (ctx, w0, w1, w2, w3, len - (u32) pos1); +} + +DECLSPEC void blake2s_final_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx) +{ + blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2S_FINAL); +} diff --git a/OpenCL/inc_hash_blake2s.h b/OpenCL/inc_hash_blake2s.h new file mode 100644 index 000000000..63f2942f1 --- /dev/null +++ b/OpenCL/inc_hash_blake2s.h @@ -0,0 +1,96 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef INC_HASH_BLAKE2S_H +#define INC_HASH_BLAKE2S_H + +#define BLAKE2S_UPDATE 0 +#define BLAKE2S_FINAL -1 + +DECLSPEC u32 blake2s_rot16_S (const u32 a); +DECLSPEC u32x blake2s_rot16 (const u32x a); + +DECLSPEC u32 blake2s_rot08_S (const u32 a); +DECLSPEC u32x blake2s_rot08 (const u32x a); + +#define BLAKE2S_G(k0,k1,a,b,c,d) \ +{ \ + a = a + b + m[k0]; \ + d = blake2s_rot16_S (d ^ a); \ + c = c + d; \ + b = hc_rotr32_S (b ^ c, 12); \ + a = a + b + m[k1]; \ + d = blake2s_rot08_S (d ^ a); \ + c = c + d; \ + b = hc_rotr32_S (b ^ c, 7); \ +} + +#define BLAKE2S_ROUND(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \ +{ \ + BLAKE2S_G (c0, c1, v[0], v[4], v[ 8], v[12]); \ + BLAKE2S_G (c2, c3, v[1], v[5], v[ 9], v[13]); \ + BLAKE2S_G (c4, c5, v[2], v[6], v[10], v[14]); \ + BLAKE2S_G (c6, c7, v[3], v[7], v[11], v[15]); \ + BLAKE2S_G (c8, c9, v[0], v[5], v[10], v[15]); \ + BLAKE2S_G (ca, cb, v[1], v[6], v[11], v[12]); \ + BLAKE2S_G (cc, cd, v[2], v[7], v[ 8], v[13]); \ + BLAKE2S_G (ce, cf, v[3], v[4], v[ 9], v[14]); \ +} + +#define BLAKE2S_G_VECTOR(k0,k1,a,b,c,d) \ +{ \ + a = a + b + m[k0]; \ + d = blake2s_rot16 (d ^ a); \ + c = c + d; \ + b = hc_rotr32 (b ^ c, 12); \ + a = a + b + m[k1]; \ + d = blake2s_rot08 (d ^ a); \ + c = c + d; \ + b = hc_rotr32 (b ^ c, 7); \ +} + +#define BLAKE2S_ROUND_VECTOR(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \ +{ \ + BLAKE2S_G_VECTOR (c0, c1, v[0], v[4], v[ 8], v[12]); \ + BLAKE2S_G_VECTOR (c2, c3, v[1], v[5], v[ 9], v[13]); \ + BLAKE2S_G_VECTOR (c4, c5, v[2], v[6], v[10], v[14]); \ + BLAKE2S_G_VECTOR (c6, c7, v[3], v[7], v[11], v[15]); \ + BLAKE2S_G_VECTOR (c8, c9, v[0], v[5], v[10], v[15]); \ + BLAKE2S_G_VECTOR (ca, cb, v[1], v[6], v[11], v[12]); \ + BLAKE2S_G_VECTOR (cc, cd, v[2], v[7], v[ 8], v[13]); \ + BLAKE2S_G_VECTOR (ce, cf, v[3], v[4], v[ 9], v[14]); \ +} + +typedef struct blake2s_ctx +{ + u32 m[16]; // buffer + u32 h[ 8]; // digest + + int len; + +} blake2s_ctx_t; + +typedef struct blake2s_ctx_vector +{ + u32x m[16]; // buffer + u32x h[ 8]; // digest + + int len; + +} blake2s_ctx_vector_t; + +DECLSPEC void blake2s_transform (PRIVATE_AS u32 *h, PRIVATE_AS const u32 *m, const int len, const u32 f0); +DECLSPEC void blake2s_init (PRIVATE_AS blake2s_ctx_t *ctx); +DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len); +DECLSPEC void blake2s_update_global (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void blake2s_final (PRIVATE_AS blake2s_ctx_t *ctx); + +DECLSPEC void blake2s_transform_vector (PRIVATE_AS u32x *h, PRIVATE_AS const u32x *m, const u32x len, const u32 f0); +DECLSPEC void blake2s_init_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx); +DECLSPEC void blake2s_init_vector_from_scalar (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS blake2s_ctx_t *ctx0); +DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len); +DECLSPEC void blake2s_final_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx); + +#endif // INC_HASH_BLAKE2S_H diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 0adcbc5df..575b1538f 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -1666,6 +1666,19 @@ typedef enum blake2b_constants } blake2b_constants_t; +typedef enum blake2s_constants +{ + BLAKE2S_IV_00=0x6a09e667, + BLAKE2S_IV_01=0xbb67ae85, + BLAKE2S_IV_02=0x3c6ef372, + BLAKE2S_IV_03=0xa54ff53a, + BLAKE2S_IV_04=0x510e527f, + BLAKE2S_IV_05=0x9b05688c, + BLAKE2S_IV_06=0x1f83d9ab, + BLAKE2S_IV_07=0x5be0cd19 + +} blake2s_constants_t; + typedef enum combinator_mode { COMBINATOR_MODE_BASE_LEFT = 10001, diff --git a/OpenCL/m31000_a0-optimized.cl b/OpenCL/m31000_a0-optimized.cl new file mode 100644 index 000000000..fbebe24f4 --- /dev/null +++ b/OpenCL/m31000_a0-optimized.cl @@ -0,0 +1,199 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp_optimized.h) +#include M2S(INCLUDE_PATH/inc_rp_optimized.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl) +#endif + +KERNEL_FQ void m31000_m04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32x m[16]; + + m[ 0] = w0[0]; + m[ 1] = w0[1]; + m[ 2] = w0[2]; + m[ 3] = w0[3]; + m[ 4] = w1[0]; + m[ 5] = w1[1]; + m[ 6] = w1[2]; + m[ 7] = w1[3]; + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u32x h[8]; + + h[0] = BLAKE2S_IV_00 ^ 0x01010020; + h[1] = BLAKE2S_IV_01; + h[2] = BLAKE2S_IV_02; + h[3] = BLAKE2S_IV_03; + h[4] = BLAKE2S_IV_04; + h[5] = BLAKE2S_IV_05; + h[6] = BLAKE2S_IV_06; + h[7] = BLAKE2S_IV_07; + + blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL); + + const u32x r0 = h[DGST_R0]; + const u32x r1 = h[DGST_R1]; + const u32x r2 = h[DGST_R2]; + const u32x r3 = h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m31000_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m31000_s04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32x m[16]; + + m[ 0] = w0[0]; + m[ 1] = w0[1]; + m[ 2] = w0[2]; + m[ 3] = w0[3]; + m[ 4] = w1[0]; + m[ 5] = w1[1]; + m[ 6] = w1[2]; + m[ 7] = w1[3]; + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u32x h[8]; + + h[0] = BLAKE2S_IV_00 ^ 0x01010020; + h[1] = BLAKE2S_IV_01; + h[2] = BLAKE2S_IV_02; + h[3] = BLAKE2S_IV_03; + h[4] = BLAKE2S_IV_04; + h[5] = BLAKE2S_IV_05; + h[6] = BLAKE2S_IV_06; + h[7] = BLAKE2S_IV_07; + + blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL); + + const u32x r0 = h[DGST_R0]; + const u32x r1 = h[DGST_R1]; + const u32x r2 = h[DGST_R2]; + const u32x r3 = h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m31000_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m31000_a0-pure.cl b/OpenCL/m31000_a0-pure.cl new file mode 100644 index 000000000..d6fdb0a51 --- /dev/null +++ b/OpenCL/m31000_a0-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp.h) +#include M2S(INCLUDE_PATH/inc_rp.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl) +#endif + +KERNEL_FQ void m31000_mxx (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + blake2s_ctx_t ctx; + + blake2s_init (&ctx); + blake2s_update (&ctx, tmp.i, tmp.pw_len); + blake2s_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_sxx (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + blake2s_ctx_t ctx; + + blake2s_init (&ctx); + blake2s_update (&ctx, tmp.i, tmp.pw_len); + blake2s_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m31000_a1-optimized.cl b/OpenCL/m31000_a1-optimized.cl new file mode 100644 index 000000000..edc4580de --- /dev/null +++ b/OpenCL/m31000_a1-optimized.cl @@ -0,0 +1,299 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl) +#endif + +KERNEL_FQ void m31000_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x out_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + u32x m[16]; + + m[ 0] = w0[0]; + m[ 1] = w0[1]; + m[ 2] = w0[2]; + m[ 3] = w0[3]; + m[ 4] = w1[0]; + m[ 5] = w1[1]; + m[ 6] = w1[2]; + m[ 7] = w1[3]; + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u32x h[8]; + + h[0] = BLAKE2S_IV_00 ^ 0x01010020; + h[1] = BLAKE2S_IV_01; + h[2] = BLAKE2S_IV_02; + h[3] = BLAKE2S_IV_03; + h[4] = BLAKE2S_IV_04; + h[5] = BLAKE2S_IV_05; + h[6] = BLAKE2S_IV_06; + h[7] = BLAKE2S_IV_07; + + blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL); + + const u32x r0 = h[DGST_R0]; + const u32x r1 = h[DGST_R1]; + const u32x r2 = h[DGST_R2]; + const u32x r3 = h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m31000_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m31000_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x out_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + u32x m[16]; + + m[ 0] = w0[0]; + m[ 1] = w0[1]; + m[ 2] = w0[2]; + m[ 3] = w0[3]; + m[ 4] = w1[0]; + m[ 5] = w1[1]; + m[ 6] = w1[2]; + m[ 7] = w1[3]; + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u32x h[8]; + + h[0] = BLAKE2S_IV_00 ^ 0x01010020; + h[1] = BLAKE2S_IV_01; + h[2] = BLAKE2S_IV_02; + h[3] = BLAKE2S_IV_03; + h[4] = BLAKE2S_IV_04; + h[5] = BLAKE2S_IV_05; + h[6] = BLAKE2S_IV_06; + h[7] = BLAKE2S_IV_07; + + blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL); + + const u32x r0 = h[DGST_R0]; + const u32x r1 = h[DGST_R1]; + const u32x r2 = h[DGST_R2]; + const u32x r3 = h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m31000_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m31000_a1-pure.cl b/OpenCL/m31000_a1-pure.cl new file mode 100644 index 000000000..e90aa0d46 --- /dev/null +++ b/OpenCL/m31000_a1-pure.cl @@ -0,0 +1,109 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl) +#endif + +KERNEL_FQ void m31000_mxx (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + blake2s_ctx_t ctx0; + + blake2s_init (&ctx0); + + blake2s_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + blake2s_ctx_t ctx = ctx0; + + blake2s_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + blake2s_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_sxx (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + blake2s_ctx_t ctx0; + + blake2s_init (&ctx0); + + blake2s_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + blake2s_ctx_t ctx = ctx0; + + blake2s_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + blake2s_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m31000_a3-optimized.cl b/OpenCL/m31000_a3-optimized.cl new file mode 100644 index 000000000..876542c05 --- /dev/null +++ b/OpenCL/m31000_a3-optimized.cl @@ -0,0 +1,407 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl) +#endif + +DECLSPEC void m31000m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) +{ + /** + * modifiers are taken from args + */ + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + const u32x w0x = w0l | w0r; + + u32x w0[4]; + u32x w1[4]; + + w0[0] = w0x; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + + u32x m[16]; + + m[ 0] = w0[0]; + m[ 1] = w0[1]; + m[ 2] = w0[2]; + m[ 3] = w0[3]; + m[ 4] = w1[0]; + m[ 5] = w1[1]; + m[ 6] = w1[2]; + m[ 7] = w1[3]; + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u32x h[8]; + + h[0] = BLAKE2S_IV_00 ^ 0x01010020; + h[1] = BLAKE2S_IV_01; + h[2] = BLAKE2S_IV_02; + h[3] = BLAKE2S_IV_03; + h[4] = BLAKE2S_IV_04; + h[5] = BLAKE2S_IV_05; + h[6] = BLAKE2S_IV_06; + h[7] = BLAKE2S_IV_07; + + blake2s_transform_vector (h, m, pw_len, BLAKE2S_FINAL); + + const u32x r0 = h[DGST_R0]; + const u32x r1 = h[DGST_R1]; + const u32x r2 = h[DGST_R2]; + const u32x r3 = h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m31000s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) +{ + /** + * modifiers are taken from args + */ + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + const u32x w0x = w0l | w0r; + + u32x w0[4]; + u32x w1[4]; + + w0[0] = w0x; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + + u32x m[16]; + + m[ 0] = w0[0]; + m[ 1] = w0[1]; + m[ 2] = w0[2]; + m[ 3] = w0[3]; + m[ 4] = w1[0]; + m[ 5] = w1[1]; + m[ 6] = w1[2]; + m[ 7] = w1[3]; + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u32x h[8]; + + h[0] = BLAKE2S_IV_00 ^ 0x01010020; + h[1] = BLAKE2S_IV_01; + h[2] = BLAKE2S_IV_02; + h[3] = BLAKE2S_IV_03; + h[4] = BLAKE2S_IV_04; + h[5] = BLAKE2S_IV_05; + h[6] = BLAKE2S_IV_06; + h[7] = BLAKE2S_IV_07; + + blake2s_transform_vector (h, m, pw_len, BLAKE2S_FINAL); + + const u32x r0 = h[DGST_R0]; + const u32x r1 = h[DGST_R1]; + const u32x r2 = h[DGST_R2]; + const u32x r3 = h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m31000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m31000_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m31000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m31000_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m31000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m31000_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m31000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m31000_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m31000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m31000_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m31000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} diff --git a/OpenCL/m31000_a3-pure.cl b/OpenCL/m31000_a3-pure.cl new file mode 100644 index 000000000..24ce691db --- /dev/null +++ b/OpenCL/m31000_a3-pure.cl @@ -0,0 +1,131 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl) +#endif + +KERNEL_FQ void m31000_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + blake2s_ctx_vector_t ctx; + + blake2s_init_vector (&ctx); + blake2s_update_vector (&ctx, w, pw_len); + blake2s_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m31000_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + blake2s_ctx_vector_t ctx; + + blake2s_init_vector (&ctx); + blake2s_update_vector (&ctx, w, pw_len); + blake2s_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/docs/readme.txt b/docs/readme.txt index 289d3e935..d2b57eb6e 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -58,6 +58,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - SHA3-512 - RIPEMD-160 - BLAKE2b-512 +- BLAKE2s-256 - GOST R 34.11-2012 (Streebog) 256-bit, big-endian - GOST R 34.11-2012 (Streebog) 512-bit, big-endian - GOST R 34.11-94 diff --git a/src/modules/module_31000.c b/src/modules/module_31000.c new file mode 100644 index 000000000..cf35180ac --- /dev/null +++ b/src/modules/module_31000.c @@ -0,0 +1,247 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 1; +static const u32 DGST_POS1 = 0; +static const u32 DGST_POS2 = 3; +static const u32 DGST_POS3 = 2; +static const u32 DGST_SIZE = DGST_SIZE_4_8; +static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; +static const char *HASH_NAME = "BLAKE2s-256"; +static const u64 KERN_TYPE = 31000; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_USES_BITS_32 | OPTI_TYPE_RAW_HASH; +static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE; +static const u32 SALT_TYPE = SALT_TYPE_NONE; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "$BLAKE2$2c719b484789ad5f6fc1739012182169b25484af156adc91d4f64f72400e574a"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return ATTACK_EXEC; +} + +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return DGST_POS0; +} + +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return DGST_POS1; +} + +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return DGST_POS2; +} + +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return DGST_POS3; +} + +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return DGST_SIZE; +} + +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return HASH_CATEGORY; +} + +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return HASH_NAME; +} + +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return KERN_TYPE; +} + +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return OPTI_TYPE; +} + +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return OPTS_TYPE; +} + +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return SALT_TYPE; +} + +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return ST_HASH; +} + +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra) +{ + return ST_PASS; +} + +static const char *SIGNATURE_BLAKE2S = "$BLAKE2$"; + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t * salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t * hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + hc_token_t token; + + token.token_cnt = 2; + + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_BLAKE2S; + + token.len[0] = 8; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_SIGNATURE; + + token.len[1] = 64; + token.attr[1] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_HEX; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) + return (rc_tokenizer); + + const u8 *hash_pos = token.buf[1]; + + digest[0] = hex_to_u32 (hash_pos + 0); + digest[1] = hex_to_u32 (hash_pos + 8); + digest[2] = hex_to_u32 (hash_pos + 16); + digest[3] = hex_to_u32 (hash_pos + 24); + digest[4] = hex_to_u32 (hash_pos + 32); + digest[5] = hex_to_u32 (hash_pos + 40); + digest[6] = hex_to_u32 (hash_pos + 48); + digest[7] = hex_to_u32 (hash_pos + 56); + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t * salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t * hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u32 *digest = (const u32 *) digest_buf; + + // we can not change anything in the original buffer, otherwise destroying sorting + // therefore create some local buffer + + u8 *out_buf = (u8 *) line_buf; + + int out_len = strlen (SIGNATURE_BLAKE2S); + + memcpy (out_buf, SIGNATURE_BLAKE2S, out_len); + + u32_to_hex (digest[0], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[1], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[2], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[3], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[4], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[5], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[6], out_buf + out_len); + out_len += 8; + u32_to_hex (digest[7], out_buf + out_len); + out_len += 8; + + return out_len; +} + +void module_init (module_ctx_t * module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_charset = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = MODULE_DEFAULT; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m31000.pm b/tools/test_modules/m31000.pm new file mode 100644 index 000000000..4c7ef55b2 --- /dev/null +++ b/tools/test_modules/m31000.pm @@ -0,0 +1,42 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Crypt::Digest::BLAKE2s_256 qw (blake2s_256_hex); + +sub module_constraints { [[0, 128], [-1, -1], [0, 64], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + + my $digest = blake2s_256_hex ($word); + + my $hash = sprintf ("\$BLAKE2\$" . lc ($digest)); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $word; + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed); + + return ($new_hash, $word); +} + +1;