diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl index 3fc0220b7..af0a14052 100644 --- a/OpenCL/inc_hash_md4.cl +++ b/OpenCL/inc_hash_md4.cl @@ -286,6 +286,92 @@ void md4_update_global (md4_ctx_t *ctx, const __global u32 *w, const int len) md4_update_64 (ctx, w0, w1, w2, w3, len - pos1); } +void md4_update_global_swap (md4_ctx_t *ctx, const __global u32 *w, const int len) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + w0[0] = swap32_S (w0[0]); + w0[1] = swap32_S (w0[1]); + w0[2] = swap32_S (w0[2]); + w0[3] = swap32_S (w0[3]); + w1[0] = swap32_S (w1[0]); + w1[1] = swap32_S (w1[1]); + w1[2] = swap32_S (w1[2]); + w1[3] = swap32_S (w1[3]); + w2[0] = swap32_S (w2[0]); + w2[1] = swap32_S (w2[1]); + w2[2] = swap32_S (w2[2]); + w2[3] = swap32_S (w2[3]); + w3[0] = swap32_S (w3[0]); + w3[1] = swap32_S (w3[1]); + w3[2] = swap32_S (w3[2]); + w3[3] = swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, 64); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + w0[0] = swap32_S (w0[0]); + w0[1] = swap32_S (w0[1]); + w0[2] = swap32_S (w0[2]); + w0[3] = swap32_S (w0[3]); + w1[0] = swap32_S (w1[0]); + w1[1] = swap32_S (w1[1]); + w1[2] = swap32_S (w1[2]); + w1[3] = swap32_S (w1[3]); + w2[0] = swap32_S (w2[0]); + w2[1] = swap32_S (w2[1]); + w2[2] = swap32_S (w2[2]); + w2[3] = swap32_S (w2[3]); + w3[0] = swap32_S (w3[0]); + w3[1] = swap32_S (w3[1]); + w3[2] = swap32_S (w3[2]); + w3[3] = swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, len - pos1); +} + void md4_update_global_utf16le (md4_ctx_t *ctx, const __global u32 *w, const int len) { u32 w0[4]; @@ -328,6 +414,82 @@ void md4_update_global_utf16le (md4_ctx_t *ctx, const __global u32 *w, const int md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } +void md4_update_global_utf16le_swap (md4_ctx_t *ctx, const __global u32 *w, const int len) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = swap32_S (w0[0]); + w0[1] = swap32_S (w0[1]); + w0[2] = swap32_S (w0[2]); + w0[3] = swap32_S (w0[3]); + w1[0] = swap32_S (w1[0]); + w1[1] = swap32_S (w1[1]); + w1[2] = swap32_S (w1[2]); + w1[3] = swap32_S (w1[3]); + w2[0] = swap32_S (w2[0]); + w2[1] = swap32_S (w2[1]); + w2[2] = swap32_S (w2[2]); + w2[3] = swap32_S (w2[3]); + w3[0] = swap32_S (w3[0]); + w3[1] = swap32_S (w3[1]); + w3[2] = swap32_S (w3[2]); + w3[3] = swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = swap32_S (w0[0]); + w0[1] = swap32_S (w0[1]); + w0[2] = swap32_S (w0[2]); + w0[3] = swap32_S (w0[3]); + w1[0] = swap32_S (w1[0]); + w1[1] = swap32_S (w1[1]); + w1[2] = swap32_S (w1[2]); + w1[3] = swap32_S (w1[3]); + w2[0] = swap32_S (w2[0]); + w2[1] = swap32_S (w2[1]); + w2[2] = swap32_S (w2[2]); + w2[3] = swap32_S (w2[3]); + w3[0] = swap32_S (w3[0]); + w3[1] = swap32_S (w3[1]); + w3[2] = swap32_S (w3[2]); + w3[3] = swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); +} + void md4_final (md4_ctx_t *ctx) { const int pos = ctx->len & 63; @@ -362,6 +524,130 @@ void md4_final (md4_ctx_t *ctx) md4_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h); } +// md4_hmac + +typedef struct md4_hmac_ctx +{ + md4_ctx_t ipad; + md4_ctx_t opad; + +} md4_hmac_ctx_t; + +void md4_hmac_init (md4_hmac_ctx_t *ctx, const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4]) +{ + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; + + // ipad + + t0[0] = w0[0] ^ 0x36363636; + t0[1] = w0[1] ^ 0x36363636; + t0[2] = w0[2] ^ 0x36363636; + t0[3] = w0[3] ^ 0x36363636; + t1[0] = w1[0] ^ 0x36363636; + t1[1] = w1[1] ^ 0x36363636; + t1[2] = w1[2] ^ 0x36363636; + t1[3] = w1[3] ^ 0x36363636; + t2[0] = w2[0] ^ 0x36363636; + t2[1] = w2[1] ^ 0x36363636; + t2[2] = w2[2] ^ 0x36363636; + t2[3] = w2[3] ^ 0x36363636; + t3[0] = w3[0] ^ 0x36363636; + t3[1] = w3[1] ^ 0x36363636; + t3[2] = w3[2] ^ 0x36363636; + t3[3] = w3[3] ^ 0x36363636; + + md4_init (&ctx->ipad); + + md4_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + + // opad + + t0[0] = w0[0] ^ 0x5c5c5c5c; + t0[1] = w0[1] ^ 0x5c5c5c5c; + t0[2] = w0[2] ^ 0x5c5c5c5c; + t0[3] = w0[3] ^ 0x5c5c5c5c; + t1[0] = w1[0] ^ 0x5c5c5c5c; + t1[1] = w1[1] ^ 0x5c5c5c5c; + t1[2] = w1[2] ^ 0x5c5c5c5c; + t1[3] = w1[3] ^ 0x5c5c5c5c; + t2[0] = w2[0] ^ 0x5c5c5c5c; + t2[1] = w2[1] ^ 0x5c5c5c5c; + t2[2] = w2[2] ^ 0x5c5c5c5c; + t2[3] = w2[3] ^ 0x5c5c5c5c; + t3[0] = w3[0] ^ 0x5c5c5c5c; + t3[1] = w3[1] ^ 0x5c5c5c5c; + t3[2] = w3[2] ^ 0x5c5c5c5c; + t3[3] = w3[3] ^ 0x5c5c5c5c; + + md4_init (&ctx->opad); + + md4_update_64 (&ctx->opad, t0, t1, t2, t3, 64); +} + +void md4_hmac_update_64 (md4_hmac_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len) +{ + md4_update_64 (&ctx->ipad, w0, w1, w2, w3, len); +} + +void md4_hmac_update (md4_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + md4_update (&ctx->ipad, w, len); +} + +void md4_hmac_update_global (md4_hmac_ctx_t *ctx, const __global u32 *w, const int len) +{ + md4_update_global (&ctx->ipad, w, len); +} + +void md4_hmac_update_global_swap (md4_hmac_ctx_t *ctx, const __global u32 *w, const int len) +{ + md4_update_global_swap (&ctx->ipad, w, len); +} + +void md4_hmac_update_global_utf16le (md4_hmac_ctx_t *ctx, const __global u32 *w, const int len) +{ + md4_update_global_utf16le (&ctx->ipad, w, len); +} + +void md4_hmac_update_global_utf16le_swap (md4_hmac_ctx_t *ctx, const __global u32 *w, const int len) +{ + md4_update_global_utf16le_swap (&ctx->ipad, w, len); +} + +void md4_hmac_final (md4_hmac_ctx_t *ctx) +{ + md4_final (&ctx->ipad); + + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; + + t0[0] = ctx->ipad.h[0]; + t0[1] = ctx->ipad.h[1]; + t0[2] = ctx->ipad.h[2]; + t0[3] = ctx->ipad.h[3]; + t1[0] = 0; + t1[1] = 0; + t1[2] = 0; + t1[3] = 0; + t2[0] = 0; + t2[1] = 0; + t2[2] = 0; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + md4_update_64 (&ctx->opad, t0, t1, t2, t3, 16); + + md4_final (&ctx->opad); +} + // while input buf can be a vector datatype, the length of the different elements can not typedef struct md4_ctx_vector