From b7dffd92590d28784a38568a47d3be45fc9eb500 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 30 Apr 2021 16:55:30 +0200 Subject: [PATCH] Improve performance for UTF8->UTF16 conversion Reverted https://github.com/hashcat/hashcat/commit/d343e2c4a0699bb8a6fd8d47b06fe4f5a157c5e8 and https://github.com/hashcat/hashcat/commit/ee26805138dcb4627e94a9b7ca7125d47d8b7297 Adds a test to decide whatever conversion technique to use. If all UTF8 characters are 7 bit, there's no need for regular conversion and we can stick to naive conversion. --- OpenCL/inc_common.cl | 14 + OpenCL/inc_common.h | 1 + OpenCL/inc_hash_md4.cl | 312 +++++++++++++++++- OpenCL/inc_hash_md4.h | 4 + OpenCL/inc_hash_md5.cl | 312 +++++++++++++++++- OpenCL/inc_hash_md5.h | 4 + OpenCL/inc_hash_ripemd160.cl | 312 +++++++++++++++++- OpenCL/inc_hash_ripemd160.h | 4 + OpenCL/inc_hash_sha1.cl | 312 +++++++++++++++++- OpenCL/inc_hash_sha1.h | 4 + OpenCL/inc_hash_sha224.cl | 312 +++++++++++++++++- OpenCL/inc_hash_sha224.h | 4 + OpenCL/inc_hash_sha256.cl | 312 +++++++++++++++++- OpenCL/inc_hash_sha256.h | 4 + OpenCL/inc_hash_sha384.cl | 472 ++++++++++++++++++++++++++- OpenCL/inc_hash_sha384.h | 4 + OpenCL/inc_hash_sha512.cl | 608 +++++++++++++++++++++++++++++++++-- OpenCL/inc_hash_sha512.h | 5 +- OpenCL/inc_hash_whirlpool.cl | 312 +++++++++++++++++- OpenCL/inc_hash_whirlpool.h | 4 + OpenCL/m02100-pure.cl | 5 - 21 files changed, 3197 insertions(+), 124 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index eb87059cf..5bb5596b5 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1981,6 +1981,20 @@ DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS cons } #endif +// Input has to be zero padded and buffer size has to be multiple of 4 + +DECLSPEC int test_any_8th_bit (const u32 *buf, const int len) +{ + for (int i = 0, j = 0; i < len; i += 4, j += 1) + { + const u32 v = buf[j]; + + if (v & 0x80808080) return 1; + } + + return 0; +} + // Constants and some code snippets from unicode.org's ConvertUTF.c // Compiler can perfectly translate some of the branches and switch cases this into MOVC // which is faster than lookup tables diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index 86e3b7e7f..e40914603 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -236,6 +236,7 @@ DECLSPEC int hash_comp (const u32 *d1, GLOBAL_AS const u32 *d2); DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS const digest_t *digests_buf); #endif +DECLSPEC int test_any_8th_bit (const u32 *buf, const int len); DECLSPEC int utf8_to_utf16le (const u32 *src_buf, int src_len, int src_size, u32 *dst_buf, int dst_size); DECLSPEC int utf8_to_utf16le_global (GLOBAL_AS const u32 *src_buf, int src_len, int src_size, u32 *dst_buf, int dst_size); DECLSPEC int pkcs_padding_bs8 (const u32 *data_buf, const int data_len); diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl index 149e7002f..addf03945 100644 --- a/OpenCL/inc_hash_md4.cl +++ b/OpenCL/inc_hash_md4.cl @@ -363,20 +363,154 @@ DECLSPEC void md4_update_swap (md4_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md4_update_utf16le (md4_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md4_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - md4_update (ctx, w_utf16_buf, w_utf16_len); + md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md4_update_utf16le_swap (md4_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md4_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - md4_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md4_update_global (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -519,20 +653,154 @@ DECLSPEC void md4_update_global_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, co DECLSPEC void md4_update_global_utf16le (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md4_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - md4_update (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md4_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - md4_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md4_final (md4_ctx_t *ctx) @@ -868,6 +1136,16 @@ DECLSPEC void md4_hmac_update_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int md4_update_swap (&ctx->ipad, w, len); } +DECLSPEC void md4_hmac_update_utf16le (md4_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + md4_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void md4_hmac_update_utf16le_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + md4_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void md4_hmac_update_global (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { md4_update_global (&ctx->ipad, w, len); @@ -878,6 +1156,16 @@ DECLSPEC void md4_hmac_update_global_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const md4_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void md4_hmac_update_global_utf16le (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + md4_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void md4_hmac_update_global_utf16le_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + md4_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void md4_hmac_final (md4_hmac_ctx_t *ctx) { md4_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_md4.h b/OpenCL/inc_hash_md4.h index c8b3351a1..7c3b31894 100644 --- a/OpenCL/inc_hash_md4.h +++ b/OpenCL/inc_hash_md4.h @@ -102,8 +102,12 @@ DECLSPEC void md4_hmac_init_global_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u3 DECLSPEC void md4_hmac_update_64 (md4_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void md4_hmac_update (md4_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void md4_hmac_update_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void md4_hmac_update_utf16le (md4_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void md4_hmac_update_utf16le_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void md4_hmac_update_global (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void md4_hmac_update_global_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void md4_hmac_update_global_utf16le (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void md4_hmac_update_global_utf16le_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void md4_hmac_final (md4_hmac_ctx_t *ctx); DECLSPEC void md4_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest); DECLSPEC void md4_init_vector (md4_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl index d268cd3c3..e3f374645 100644 --- a/OpenCL/inc_hash_md5.cl +++ b/OpenCL/inc_hash_md5.cl @@ -399,20 +399,154 @@ DECLSPEC void md5_update_swap (md5_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md5_update_utf16le (md5_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md5_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - md5_update (ctx, w_utf16_buf, w_utf16_len); + md5_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + md5_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md5_update_utf16le_swap (md5_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md5_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + md5_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - md5_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + md5_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md5_update_global (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -555,20 +689,154 @@ DECLSPEC void md5_update_global_swap (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, co DECLSPEC void md5_update_global_utf16le (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md5_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + md5_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - md5_update (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + md5_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + md5_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + md5_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - md5_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + md5_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void md5_final (md5_ctx_t *ctx) @@ -904,6 +1172,16 @@ DECLSPEC void md5_hmac_update_swap (md5_hmac_ctx_t *ctx, const u32 *w, const int md5_update_swap (&ctx->ipad, w, len); } +DECLSPEC void md5_hmac_update_utf16le (md5_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + md5_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void md5_hmac_update_utf16le_swap (md5_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + md5_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void md5_hmac_update_global (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { md5_update_global (&ctx->ipad, w, len); @@ -914,6 +1192,16 @@ DECLSPEC void md5_hmac_update_global_swap (md5_hmac_ctx_t *ctx, GLOBAL_AS const md5_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void md5_hmac_update_global_utf16le (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + md5_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void md5_hmac_update_global_utf16le_swap (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + md5_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void md5_hmac_final (md5_hmac_ctx_t *ctx) { md5_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_md5.h b/OpenCL/inc_hash_md5.h index 273a35bb3..1e6eaaf93 100644 --- a/OpenCL/inc_hash_md5.h +++ b/OpenCL/inc_hash_md5.h @@ -109,8 +109,12 @@ DECLSPEC void md5_hmac_init_global_swap (md5_hmac_ctx_t *ctx, GLOBAL_AS const u3 DECLSPEC void md5_hmac_update_64 (md5_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void md5_hmac_update (md5_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void md5_hmac_update_swap (md5_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void md5_hmac_update_utf16le (md5_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void md5_hmac_update_utf16le_swap (md5_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void md5_hmac_update_global (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void md5_hmac_update_global_swap (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void md5_hmac_update_global_utf16le (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void md5_hmac_update_global_utf16le_swap (md5_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void md5_hmac_final (md5_hmac_ctx_t *ctx); DECLSPEC void md5_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest); DECLSPEC void md5_init_vector (md5_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl index d100ebf70..c222a5f6b 100644 --- a/OpenCL/inc_hash_ripemd160.cl +++ b/OpenCL/inc_hash_ripemd160.cl @@ -497,20 +497,154 @@ DECLSPEC void ripemd160_update_swap (ripemd160_ctx_t *ctx, const u32 *w, const i DECLSPEC void ripemd160_update_utf16le (ripemd160_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + ripemd160_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - ripemd160_update (ctx, w_utf16_buf, w_utf16_len); + ripemd160_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + ripemd160_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void ripemd160_update_utf16le_swap (ripemd160_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + ripemd160_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + ripemd160_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - ripemd160_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + ripemd160_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void ripemd160_update_global (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -653,20 +787,154 @@ DECLSPEC void ripemd160_update_global_swap (ripemd160_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void ripemd160_update_global_utf16le (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + ripemd160_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + ripemd160_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - ripemd160_update (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + ripemd160_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + ripemd160_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + ripemd160_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - ripemd160_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + ripemd160_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void ripemd160_final (ripemd160_ctx_t *ctx) @@ -1002,6 +1270,16 @@ DECLSPEC void ripemd160_hmac_update_swap (ripemd160_hmac_ctx_t *ctx, const u32 * ripemd160_update_swap (&ctx->ipad, w, len); } +DECLSPEC void ripemd160_hmac_update_utf16le (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + ripemd160_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void ripemd160_hmac_update_utf16le_swap (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + ripemd160_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void ripemd160_hmac_update_global (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { ripemd160_update_global (&ctx->ipad, w, len); @@ -1012,6 +1290,16 @@ DECLSPEC void ripemd160_hmac_update_global_swap (ripemd160_hmac_ctx_t *ctx, GLOB ripemd160_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void ripemd160_hmac_update_global_utf16le (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + ripemd160_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void ripemd160_hmac_update_global_utf16le_swap (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + ripemd160_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void ripemd160_hmac_final (ripemd160_hmac_ctx_t *ctx) { ripemd160_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_ripemd160.h b/OpenCL/inc_hash_ripemd160.h index 70fa3f60f..25a69ed56 100644 --- a/OpenCL/inc_hash_ripemd160.h +++ b/OpenCL/inc_hash_ripemd160.h @@ -122,8 +122,12 @@ DECLSPEC void ripemd160_hmac_init_global_swap (ripemd160_hmac_ctx_t *ctx, GLOBAL DECLSPEC void ripemd160_hmac_update_64 (ripemd160_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void ripemd160_hmac_update (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void ripemd160_hmac_update_swap (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void ripemd160_hmac_update_utf16le (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void ripemd160_hmac_update_utf16le_swap (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void ripemd160_hmac_update_global (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void ripemd160_hmac_update_global_swap (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void ripemd160_hmac_update_global_utf16le (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void ripemd160_hmac_update_global_utf16le_swap (ripemd160_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void ripemd160_hmac_final (ripemd160_hmac_ctx_t *ctx); DECLSPEC void ripemd160_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest); DECLSPEC void ripemd160_init_vector (ripemd160_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl index 3ea263178..81dcfb278 100644 --- a/OpenCL/inc_hash_sha1.cl +++ b/OpenCL/inc_hash_sha1.cl @@ -612,20 +612,154 @@ DECLSPEC void sha1_update_swap (sha1_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void sha1_update_utf16le (sha1_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha1_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; - sha1_update (ctx, w_utf16_buf, w_utf16_len); + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha1_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha1_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha1_update_utf16le_swap (sha1_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha1_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha1_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha1_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha1_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha1_update_utf16be (sha1_ctx_t *ctx, const u32 *w, const int len) @@ -886,20 +1020,154 @@ DECLSPEC void sha1_update_global_swap (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void sha1_update_global_utf16le (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha1_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - sha1_update (ctx, w_utf16_buf, w_utf16_len); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha1_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha1_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha1_update_global_utf16le_swap (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha1_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha1_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha1_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha1_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha1_update_global_utf16be (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -1353,6 +1621,16 @@ DECLSPEC void sha1_hmac_update_swap (sha1_hmac_ctx_t *ctx, const u32 *w, const i sha1_update_swap (&ctx->ipad, w, len); } +DECLSPEC void sha1_hmac_update_utf16le (sha1_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha1_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha1_hmac_update_utf16le_swap (sha1_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha1_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha1_hmac_update_global (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { sha1_update_global (&ctx->ipad, w, len); @@ -1363,6 +1641,16 @@ DECLSPEC void sha1_hmac_update_global_swap (sha1_hmac_ctx_t *ctx, GLOBAL_AS cons sha1_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void sha1_hmac_update_global_utf16le (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha1_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha1_hmac_update_global_utf16le_swap (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha1_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha1_hmac_final (sha1_hmac_ctx_t *ctx) { sha1_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_sha1.h b/OpenCL/inc_hash_sha1.h index 69f6b58d4..2ff36fdad 100644 --- a/OpenCL/inc_hash_sha1.h +++ b/OpenCL/inc_hash_sha1.h @@ -114,8 +114,12 @@ DECLSPEC void sha1_hmac_init_global_swap (sha1_hmac_ctx_t *ctx, GLOBAL_AS const DECLSPEC void sha1_hmac_update_64 (sha1_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void sha1_hmac_update (sha1_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha1_hmac_update_swap (sha1_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha1_hmac_update_utf16le (sha1_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha1_hmac_update_utf16le_swap (sha1_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha1_hmac_update_global (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha1_hmac_update_global_swap (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha1_hmac_update_global_utf16le (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha1_hmac_update_global_utf16le_swap (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha1_hmac_final (sha1_hmac_ctx_t *ctx); DECLSPEC void sha1_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest); DECLSPEC void sha1_init_vector (sha1_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl index 2e970a10a..f2a7de668 100644 --- a/OpenCL/inc_hash_sha224.cl +++ b/OpenCL/inc_hash_sha224.cl @@ -414,20 +414,154 @@ DECLSPEC void sha224_update_swap (sha224_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha224_update_utf16le (sha224_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha224_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha224_update (ctx, w_utf16_buf, w_utf16_len); + sha224_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha224_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha224_update_utf16le_swap (sha224_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha224_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + sha224_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - sha224_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha224_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha224_update_global (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -570,20 +704,154 @@ DECLSPEC void sha224_update_global_swap (sha224_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha224_update_global_utf16le (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha224_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha224_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - sha224_update (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha224_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha224_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha224_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha224_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha224_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha224_final (sha224_ctx_t *ctx) @@ -919,6 +1187,16 @@ DECLSPEC void sha224_hmac_update_swap (sha224_hmac_ctx_t *ctx, const u32 *w, con sha224_update_swap (&ctx->ipad, w, len); } +DECLSPEC void sha224_hmac_update_utf16le (sha224_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha224_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha224_hmac_update_utf16le_swap (sha224_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha224_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha224_hmac_update_global (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { sha224_update_global (&ctx->ipad, w, len); @@ -929,6 +1207,16 @@ DECLSPEC void sha224_hmac_update_global_swap (sha224_hmac_ctx_t *ctx, GLOBAL_AS sha224_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void sha224_hmac_update_global_utf16le (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha224_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha224_hmac_update_global_utf16le_swap (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha224_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha224_hmac_final (sha224_hmac_ctx_t *ctx) { sha224_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_sha224.h b/OpenCL/inc_hash_sha224.h index 46f03a35d..d68c79d65 100644 --- a/OpenCL/inc_hash_sha224.h +++ b/OpenCL/inc_hash_sha224.h @@ -109,8 +109,12 @@ DECLSPEC void sha224_hmac_init_global_swap (sha224_hmac_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha224_hmac_update_64 (sha224_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void sha224_hmac_update (sha224_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha224_hmac_update_swap (sha224_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha224_hmac_update_utf16le (sha224_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha224_hmac_update_utf16le_swap (sha224_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha224_hmac_update_global (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha224_hmac_update_global_swap (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha224_hmac_update_global_utf16le (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha224_hmac_update_global_utf16le_swap (sha224_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha224_hmac_final (sha224_hmac_ctx_t *ctx); DECLSPEC void sha224_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest); DECLSPEC void sha224_init_vector (sha224_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl index 93208b060..961d55b1e 100644 --- a/OpenCL/inc_hash_sha256.cl +++ b/OpenCL/inc_hash_sha256.cl @@ -414,20 +414,154 @@ DECLSPEC void sha256_update_swap (sha256_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha256_update_utf16le (sha256_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha256_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha256_update (ctx, w_utf16_buf, w_utf16_len); + sha256_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha256_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha256_update_utf16le_swap (sha256_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha256_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + sha256_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - sha256_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha256_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha256_update_global (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -570,20 +704,154 @@ DECLSPEC void sha256_update_global_swap (sha256_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha256_update_global_utf16le (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha256_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha256_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - sha256_update (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha256_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha256_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha256_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha256_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + sha256_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void sha256_final (sha256_ctx_t *ctx) @@ -919,6 +1187,16 @@ DECLSPEC void sha256_hmac_update_swap (sha256_hmac_ctx_t *ctx, const u32 *w, con sha256_update_swap (&ctx->ipad, w, len); } +DECLSPEC void sha256_hmac_update_utf16le (sha256_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha256_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha256_hmac_update_utf16le_swap (sha256_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha256_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha256_hmac_update_global (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { sha256_update_global (&ctx->ipad, w, len); @@ -929,6 +1207,16 @@ DECLSPEC void sha256_hmac_update_global_swap (sha256_hmac_ctx_t *ctx, GLOBAL_AS sha256_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void sha256_hmac_update_global_utf16le (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha256_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha256_hmac_update_global_utf16le_swap (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha256_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha256_hmac_final (sha256_hmac_ctx_t *ctx) { sha256_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_sha256.h b/OpenCL/inc_hash_sha256.h index bc655d80b..ccf5a79f8 100644 --- a/OpenCL/inc_hash_sha256.h +++ b/OpenCL/inc_hash_sha256.h @@ -109,8 +109,12 @@ DECLSPEC void sha256_hmac_init_global_swap (sha256_hmac_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha256_hmac_update_64 (sha256_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void sha256_hmac_update (sha256_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha256_hmac_update_swap (sha256_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha256_hmac_update_utf16le (sha256_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha256_hmac_update_utf16le_swap (sha256_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha256_hmac_update_global (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha256_hmac_update_global_swap (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha256_hmac_update_global_utf16le (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha256_hmac_update_global_utf16le_swap (sha256_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha256_hmac_final (sha256_hmac_ctx_t *ctx); DECLSPEC void sha256_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest); DECLSPEC void sha256_init_vector (sha256_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl index 37db1f676..f4dbb1c85 100644 --- a/OpenCL/inc_hash_sha384.cl +++ b/OpenCL/inc_hash_sha384.cl @@ -622,20 +622,234 @@ DECLSPEC void sha384_update_swap (sha384_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha384_update_utf16le (sha384_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha384_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha384_update (ctx, w_utf16_buf, w_utf16_len); + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha384_update_utf16le_swap (sha384_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha384_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } - sha384_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha384_update_global (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -882,20 +1096,234 @@ DECLSPEC void sha384_update_global_swap (sha384_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha384_update_global_utf16le (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha384_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha384_update (ctx, w_utf16_buf, w_utf16_len); + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha384_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } - sha384_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + + sha384_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha384_final (sha384_ctx_t *ctx) @@ -1427,6 +1855,16 @@ DECLSPEC void sha384_hmac_update_swap (sha384_hmac_ctx_t *ctx, const u32 *w, con sha384_update_swap (&ctx->ipad, w, len); } +DECLSPEC void sha384_hmac_update_utf16le (sha384_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha384_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha384_hmac_update_utf16le_swap (sha384_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha384_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha384_hmac_update_global (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { sha384_update_global (&ctx->ipad, w, len); @@ -1437,6 +1875,16 @@ DECLSPEC void sha384_hmac_update_global_swap (sha384_hmac_ctx_t *ctx, GLOBAL_AS sha384_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void sha384_hmac_update_global_utf16le (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha384_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha384_hmac_update_global_utf16le_swap (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha384_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha384_hmac_final (sha384_hmac_ctx_t *ctx) { sha384_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_sha384.h b/OpenCL/inc_hash_sha384.h index e3705c206..92266b24a 100644 --- a/OpenCL/inc_hash_sha384.h +++ b/OpenCL/inc_hash_sha384.h @@ -123,8 +123,12 @@ DECLSPEC void sha384_hmac_init_global_swap (sha384_hmac_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha384_hmac_update_128 (sha384_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len); DECLSPEC void sha384_hmac_update (sha384_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha384_hmac_update_swap (sha384_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha384_hmac_update_utf16le (sha384_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha384_hmac_update_utf16le_swap (sha384_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha384_hmac_update_global (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha384_hmac_update_global_swap (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha384_hmac_update_global_utf16le (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha384_hmac_update_global_utf16le_swap (sha384_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha384_hmac_final (sha384_hmac_ctx_t *ctx); DECLSPEC void sha384_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x *w4, const u32x *w5, const u32x *w6, const u32x *w7, u64x *digest); DECLSPEC void sha384_init_vector (sha384_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl index a00780271..0afa8d452 100644 --- a/OpenCL/inc_hash_sha512.cl +++ b/OpenCL/inc_hash_sha512.cl @@ -622,20 +622,234 @@ DECLSPEC void sha512_update_swap (sha512_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha512_update_utf16le (sha512_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha512_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - sha512_update (ctx, w_utf16_buf, w_utf16_len); + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha512_update_utf16le_swap (sha512_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + u32 *w_ptr = w_utf16_buf + blkoff; - sha512_update_swap (ctx, w_utf16_buf, w_utf16_len); + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha512_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha512_update_global (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -882,20 +1096,234 @@ DECLSPEC void sha512_update_global_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha512_update_global_utf16le (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + const int blkoff = (w_utf16_len / 64) * 16; - sha512_update (ctx, w_utf16_buf, w_utf16_len); + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha512_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); - sha512_update_swap (ctx, w_utf16_buf, w_utf16_len); + sha512_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + + sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2); } DECLSPEC void sha512_final (sha512_ctx_t *ctx) @@ -1412,22 +1840,138 @@ DECLSPEC void sha512_hmac_init_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS co sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7); } -DECLSPEC void sha512_hmac_init_global_ut16le (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); - sha512_hmac_init (ctx, w_utf16_buf, w_utf16_len); -} + const int blkoff = (w_utf16_len / 64) * 16; -DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) -{ - u32 w_utf16_buf[256] = { 0 }; + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + sha512_hmac_init_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + const int len_new = len * 2; - sha512_hmac_init_swap (ctx, w_utf16_buf, w_utf16_len); + if (len_new > 128) + { + sha512_ctx_t tmp; + + sha512_init (&tmp); + + sha512_update_global_utf16le_swap (&tmp, w, len); + + sha512_final (&tmp); + + w0[0] = h32_from_64_S (tmp.h[0]); + w0[1] = l32_from_64_S (tmp.h[0]); + w0[2] = h32_from_64_S (tmp.h[1]); + w0[3] = l32_from_64_S (tmp.h[1]); + w1[0] = h32_from_64_S (tmp.h[2]); + w1[1] = l32_from_64_S (tmp.h[2]); + w1[2] = h32_from_64_S (tmp.h[3]); + w1[3] = l32_from_64_S (tmp.h[3]); + w2[0] = h32_from_64_S (tmp.h[4]); + w2[1] = l32_from_64_S (tmp.h[4]); + w2[2] = h32_from_64_S (tmp.h[5]); + w2[3] = l32_from_64_S (tmp.h[5]); + w3[0] = h32_from_64_S (tmp.h[6]); + w3[1] = l32_from_64_S (tmp.h[6]); + w3[2] = h32_from_64_S (tmp.h[7]); + w3[3] = l32_from_64_S (tmp.h[7]); + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + } + else + { + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; + + make_utf16le_S (w3, w6, w7); + make_utf16le_S (w2, w4, w5); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + w4[0] = hc_swap32_S (w4[0]); + w4[1] = hc_swap32_S (w4[1]); + w4[2] = hc_swap32_S (w4[2]); + w4[3] = hc_swap32_S (w4[3]); + w5[0] = hc_swap32_S (w5[0]); + w5[1] = hc_swap32_S (w5[1]); + w5[2] = hc_swap32_S (w5[2]); + w5[3] = hc_swap32_S (w5[3]); + w6[0] = hc_swap32_S (w6[0]); + w6[1] = hc_swap32_S (w6[1]); + w6[2] = hc_swap32_S (w6[2]); + w6[3] = hc_swap32_S (w6[3]); + w7[0] = hc_swap32_S (w7[0]); + w7[1] = hc_swap32_S (w7[1]); + w7[2] = hc_swap32_S (w7[2]); + w7[3] = hc_swap32_S (w7[3]); + } + + sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7); } DECLSPEC void sha512_hmac_update_128 (sha512_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) @@ -1445,6 +1989,16 @@ DECLSPEC void sha512_hmac_update_swap (sha512_hmac_ctx_t *ctx, const u32 *w, con sha512_update_swap (&ctx->ipad, w, len); } +DECLSPEC void sha512_hmac_update_utf16le (sha512_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha512_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha512_hmac_update_utf16le_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + sha512_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha512_hmac_update_global (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { sha512_update_global (&ctx->ipad, w, len); @@ -1455,6 +2009,16 @@ DECLSPEC void sha512_hmac_update_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS sha512_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void sha512_hmac_update_global_utf16le (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha512_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void sha512_hmac_update_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + sha512_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void sha512_hmac_final (sha512_hmac_ctx_t *ctx) { sha512_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_sha512.h b/OpenCL/inc_hash_sha512.h index 7009b7d4a..c66aa1fb9 100644 --- a/OpenCL/inc_hash_sha512.h +++ b/OpenCL/inc_hash_sha512.h @@ -120,13 +120,16 @@ DECLSPEC void sha512_hmac_init (sha512_hmac_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha512_hmac_init_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha512_hmac_init_global (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha512_hmac_init_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); -DECLSPEC void sha512_hmac_init_global_ut16le (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha512_hmac_update_128 (sha512_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len); DECLSPEC void sha512_hmac_update (sha512_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha512_hmac_update_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha512_hmac_update_utf16le (sha512_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void sha512_hmac_update_utf16le_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void sha512_hmac_update_global (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha512_hmac_update_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha512_hmac_update_global_utf16le (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void sha512_hmac_update_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void sha512_hmac_final (sha512_hmac_ctx_t *ctx); DECLSPEC void sha512_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x *w4, const u32x *w5, const u32x *w6, const u32x *w7, u64x *digest); DECLSPEC void sha512_init_vector (sha512_ctx_vector_t *ctx); diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index d2853e4f9..3e2181ed2 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -1018,20 +1018,154 @@ DECLSPEC void whirlpool_update_swap (whirlpool_ctx_t *ctx, const u32 *w, const i DECLSPEC void whirlpool_update_utf16le (whirlpool_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + whirlpool_update (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - whirlpool_update (ctx, w_utf16_buf, w_utf16_len); + whirlpool_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + whirlpool_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void whirlpool_update_utf16le_swap (whirlpool_ctx_t *ctx, const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + whirlpool_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); - const int w_utf16_len = utf8_to_utf16le (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + whirlpool_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - whirlpool_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + whirlpool_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void whirlpool_update_global (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) @@ -1174,20 +1308,154 @@ DECLSPEC void whirlpool_update_global_swap (whirlpool_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + whirlpool_update (ctx, w_utf16_buf, w_utf16_len); - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + whirlpool_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } - whirlpool_update (ctx, w_utf16_buf, w_utf16_len); + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + whirlpool_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { - u32 w_utf16_buf[256] = { 0 }; + if (test_any_8th_bit (w, len) == 1) + { + u32 w_utf16_buf[256]; + + const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + + const int blkoff = (w_utf16_len / 64) * 16; + + u32 *w_ptr = w_utf16_buf + blkoff; + + truncate_block_16x4_le_S (w_ptr + 0, w_ptr + 4, w_ptr + 8, w_ptr + 12, w_utf16_len & 63); + + whirlpool_update_swap (ctx, w_utf16_buf, w_utf16_len); + + return; + } + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + int pos1; + int pos4; + + for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); + + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + whirlpool_update_64 (ctx, w0, w1, w2, w3, 32 * 2); + } + + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; - const int w_utf16_len = utf8_to_utf16le_global (w, len, 256, w_utf16_buf, sizeof (w_utf16_buf)); + make_utf16le_S (w1, w2, w3); + make_utf16le_S (w0, w0, w1); - whirlpool_update_swap (ctx, w_utf16_buf, w_utf16_len); + w0[0] = hc_swap32_S (w0[0]); + w0[1] = hc_swap32_S (w0[1]); + w0[2] = hc_swap32_S (w0[2]); + w0[3] = hc_swap32_S (w0[3]); + w1[0] = hc_swap32_S (w1[0]); + w1[1] = hc_swap32_S (w1[1]); + w1[2] = hc_swap32_S (w1[2]); + w1[3] = hc_swap32_S (w1[3]); + w2[0] = hc_swap32_S (w2[0]); + w2[1] = hc_swap32_S (w2[1]); + w2[2] = hc_swap32_S (w2[2]); + w2[3] = hc_swap32_S (w2[3]); + w3[0] = hc_swap32_S (w3[0]); + w3[1] = hc_swap32_S (w3[1]); + w3[2] = hc_swap32_S (w3[2]); + w3[3] = hc_swap32_S (w3[3]); + + whirlpool_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2); } DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) @@ -1523,6 +1791,16 @@ DECLSPEC void whirlpool_hmac_update_swap (whirlpool_hmac_ctx_t *ctx, const u32 * whirlpool_update_swap (&ctx->ipad, w, len); } +DECLSPEC void whirlpool_hmac_update_utf16le (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + whirlpool_update_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void whirlpool_hmac_update_utf16le_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len) +{ + whirlpool_update_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void whirlpool_hmac_update_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { whirlpool_update_global (&ctx->ipad, w, len); @@ -1533,6 +1811,16 @@ DECLSPEC void whirlpool_hmac_update_global_swap (whirlpool_hmac_ctx_t *ctx, GLOB whirlpool_update_global_swap (&ctx->ipad, w, len); } +DECLSPEC void whirlpool_hmac_update_global_utf16le (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + whirlpool_update_global_utf16le (&ctx->ipad, w, len); +} + +DECLSPEC void whirlpool_hmac_update_global_utf16le_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) +{ + whirlpool_update_global_utf16le_swap (&ctx->ipad, w, len); +} + DECLSPEC void whirlpool_hmac_final (whirlpool_hmac_ctx_t *ctx) { whirlpool_final (&ctx->ipad); diff --git a/OpenCL/inc_hash_whirlpool.h b/OpenCL/inc_hash_whirlpool.h index b7600feca..e13ec9960 100644 --- a/OpenCL/inc_hash_whirlpool.h +++ b/OpenCL/inc_hash_whirlpool.h @@ -104,8 +104,12 @@ DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL DECLSPEC void whirlpool_hmac_update_64 (whirlpool_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void whirlpool_hmac_update (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void whirlpool_hmac_update_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void whirlpool_hmac_update_utf16le (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void whirlpool_hmac_update_utf16le_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void whirlpool_hmac_update_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void whirlpool_hmac_update_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void whirlpool_hmac_update_global_utf16le (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); +DECLSPEC void whirlpool_hmac_update_global_utf16le_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void whirlpool_hmac_final (whirlpool_hmac_ctx_t *ctx); DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u64 *s_MT0, SHM_TYPE u64 *s_MT1, SHM_TYPE u64 *s_MT2, SHM_TYPE u64 *s_MT3, SHM_TYPE u64 *s_MT4, SHM_TYPE u64 *s_MT5, SHM_TYPE u64 *s_MT6, SHM_TYPE u64 *s_MT7); DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 *s_MT0, SHM_TYPE u64 *s_MT1, SHM_TYPE u64 *s_MT2, SHM_TYPE u64 *s_MT3, SHM_TYPE u64 *s_MT4, SHM_TYPE u64 *s_MT5, SHM_TYPE u64 *s_MT6, SHM_TYPE u64 *s_MT7); diff --git a/OpenCL/m02100-pure.cl b/OpenCL/m02100-pure.cl index e9a3bd04e..9242bf6e6 100644 --- a/OpenCL/m02100-pure.cl +++ b/OpenCL/m02100-pure.cl @@ -28,11 +28,6 @@ typedef struct dcc2_tmp } dcc2_tmp_t; -DECLSPEC void sha1_hmac_update_global_utf16le_swap (sha1_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) -{ - sha1_update_global_utf16le_swap (&ctx->ipad, w, len); -} - DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) { digest[0] = ipad[0];