1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-01-08 23:01:14 +00:00

Update UTF8 to UTF16 conversion to match RFC 3629

This commit is contained in:
Jens Steube 2023-02-10 23:28:23 +01:00
parent 19b3ca87bc
commit 730b1cd5cc

View File

@ -2240,6 +2240,7 @@ DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src
int extraBytesToRead = 0;
/* old version, doesnt work with https://github.com/hashcat/hashcat/issues/3592
if (c >= 0xfc)
{
extraBytesToRead = 5;
@ -2260,6 +2261,50 @@ DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src
{
extraBytesToRead = 1;
}
*/
if (c <= 0x7f)
{
extraBytesToRead = 0;
}
else if ((c >= 0xc2) && (c <= 0xdf))
{
extraBytesToRead = 1;
}
else if (c == 0xe0)
{
extraBytesToRead = 2;
}
else if (c == 0xec)
{
extraBytesToRead = 2;
}
else if (c == 0xed)
{
extraBytesToRead = 2;
}
else if (c == 0xef)
{
extraBytesToRead = 2;
}
else if (c == 0xf0)
{
extraBytesToRead = 3;
}
else if (c == 0xf3)
{
extraBytesToRead = 3;
}
else if (c == 0xf4)
{
extraBytesToRead = 3;
}
else
{
hc_enc->pos = src_len;
return -1;
}
if ((src_pos + extraBytesToRead) >= src_sz)
{
@ -2283,9 +2328,11 @@ DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src
switch (extraBytesToRead)
{
/* old version, doesnt work with https://github.com/hashcat/hashcat/issues/3592
/*
case 5:
ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */
ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */
ch += src_ptr[src_pos++]; ch <<= 6; // remember, illegal UTF-8
ch += src_ptr[src_pos++]; ch <<= 6; // remember, illegal UTF-8
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
@ -2293,13 +2340,14 @@ DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src
ch -= offsetsFromUTF8_5;
break;
case 4:
ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */
ch += src_ptr[src_pos++]; ch <<= 6; // remember, illegal UTF-8
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++];
ch -= offsetsFromUTF8_4;
break;
*/
case 3:
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
@ -2386,6 +2434,7 @@ DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u3
int extraBytesToRead = 0;
/* old version, doesnt work with https://github.com/hashcat/hashcat/issues/3592
if (c >= 0xfc)
{
extraBytesToRead = 5;
@ -2406,6 +2455,50 @@ DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u3
{
extraBytesToRead = 1;
}
*/
if (c <= 0x7f)
{
extraBytesToRead = 0;
}
else if ((c >= 0xc2) && (c <= 0xdf))
{
extraBytesToRead = 1;
}
else if (c == 0xe0)
{
extraBytesToRead = 2;
}
else if (c == 0xec)
{
extraBytesToRead = 2;
}
else if (c == 0xed)
{
extraBytesToRead = 2;
}
else if (c == 0xef)
{
extraBytesToRead = 2;
}
else if (c == 0xf0)
{
extraBytesToRead = 3;
}
else if (c == 0xf3)
{
extraBytesToRead = 3;
}
else if (c == 0xf4)
{
extraBytesToRead = 3;
}
else
{
hc_enc->pos = src_len;
return -1;
}
if ((src_pos + extraBytesToRead) >= src_sz)
{
@ -2429,9 +2522,11 @@ DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u3
switch (extraBytesToRead)
{
/* old version, doesnt work with https://github.com/hashcat/hashcat/issues/3592
/*
case 5:
ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */
ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */
ch += src_ptr[src_pos++]; ch <<= 6; // remember, illegal UTF-8
ch += src_ptr[src_pos++]; ch <<= 6; // remember, illegal UTF-8
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
@ -2439,13 +2534,14 @@ DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u3
ch -= offsetsFromUTF8_5;
break;
case 4:
ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */
ch += src_ptr[src_pos++]; ch <<= 6; // remember, illegal UTF-8
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++];
ch -= offsetsFromUTF8_4;
break;
*/
case 3:
ch += src_ptr[src_pos++]; ch <<= 6;
ch += src_ptr[src_pos++]; ch <<= 6;