mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-23 14:10:57 +00:00
Rewritten printable_utf8() because the original one throws too many warnings with GCC 8
This commit is contained in:
parent
469fece141
commit
ce4858f7a3
118
src/convert.c
118
src/convert.c
@ -7,64 +7,96 @@
|
|||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "convert.h"
|
#include "convert.h"
|
||||||
|
|
||||||
#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 70101
|
/*
|
||||||
#define FALLTHROUGH __attribute__ ((fallthrough))
|
Source:
|
||||||
#else
|
http://www.unicode.org/versions/Unicode7.0.0/UnicodeStandard-7.0.pdf
|
||||||
#define FALLTHROUGH
|
page 124, 3.9 "Unicode Encoding Forms", "UTF-8"
|
||||||
#endif
|
|
||||||
|
Table 3-7. Well-Formed UTF-8 Byte Sequences
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
| Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
|
||||||
|
| U+0000..U+007F | 00..7F | | | |
|
||||||
|
| U+0080..U+07FF | C2..DF | 80..BF | | |
|
||||||
|
| U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
|
||||||
|
| U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
|
||||||
|
| U+D000..U+D7FF | ED | 80..9F | 80..BF | |
|
||||||
|
| U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
|
||||||
|
| U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
|
||||||
|
| U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
|
||||||
|
| U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
static bool printable_utf8 (const u8 *buf, const size_t len)
|
static bool printable_utf8 (const u8 *buf, const size_t len)
|
||||||
{
|
{
|
||||||
u8 a;
|
// there's 9 different code point types for utf8 and ...
|
||||||
int length;
|
|
||||||
const u8 *buf_end = buf + len;
|
const int cp_types[256] =
|
||||||
const u8 *srcptr;
|
{
|
||||||
const char trailingBytesUTF8[64] = {
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 5, 5,
|
||||||
|
6, 7, 7, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||||
};
|
};
|
||||||
|
|
||||||
while (buf < buf_end) {
|
// ... they can be directly translated into a fixed length sequence of bytes
|
||||||
|
|
||||||
// This line rejects unprintables. The rest of the function
|
const size_t cp_lens[9] = { 1, 2, 3, 3, 3, 3, 4, 4, 4 };
|
||||||
// reliably rejects invalid UTF-8 sequences.
|
|
||||||
if (*buf < 0x20 || *buf == 0x7f) return false;
|
|
||||||
|
|
||||||
if (*buf < 0x80) {
|
for (size_t pos = 0; pos < len; pos++)
|
||||||
buf++;
|
{
|
||||||
continue;
|
const u8 c0 = buf[pos];
|
||||||
}
|
|
||||||
|
|
||||||
length = trailingBytesUTF8[*buf & 0x3f] + 1;
|
const int cp_type = cp_types[c0];
|
||||||
srcptr = buf + length;
|
|
||||||
|
|
||||||
if (srcptr > buf_end) return false;
|
if (cp_type == -1) return false;
|
||||||
|
|
||||||
switch (length) {
|
// make sure to not read outside the buffer
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
case 4:
|
|
||||||
if ((a = (*--srcptr)) < 0x80 || a > 0xbf) return false; FALLTHROUGH;
|
|
||||||
case 3:
|
|
||||||
if ((a = (*--srcptr)) < 0x80 || a > 0xbf) return false; FALLTHROUGH;
|
|
||||||
case 2:
|
|
||||||
if ((a = (*--srcptr)) < 0x80 || a > 0xbf) return false;
|
|
||||||
|
|
||||||
switch (*buf) {
|
const size_t cp_len = cp_lens[cp_type];
|
||||||
case 0xE0: if (a < 0xa0) return false; break;
|
|
||||||
case 0xED: if (a > 0x9f) return false; break;
|
if ((pos + cp_len) > len) return false;
|
||||||
case 0xF0: if (a < 0x90) return false; break;
|
|
||||||
case 0xF4: if (a > 0x8f) return false; FALLTHROUGH;
|
// multibyte from here
|
||||||
|
|
||||||
|
if (cp_len >= 2)
|
||||||
|
{
|
||||||
|
pos++;
|
||||||
|
|
||||||
|
const u8 c1 = buf[pos];
|
||||||
|
|
||||||
|
switch (cp_type)
|
||||||
|
{
|
||||||
|
case 2: if ((c1 < 0xa0) || (c1 > 0xbf)) return false; break;
|
||||||
|
case 4: if ((c1 < 0x80) || (c1 > 0x9f)) return false; break;
|
||||||
|
case 6: if ((c1 < 0x90) || (c1 > 0xbf)) return false; break;
|
||||||
|
case 8: if ((c1 < 0x80) || (c1 > 0x8f)) return false; break;
|
||||||
|
default: if ((c1 < 0x80) || (c1 > 0xbf)) return false; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case 1:
|
for (size_t j = 2; j < cp_len; j++)
|
||||||
if (*buf >= 0x80 && *buf < 0xc2) return false;
|
{
|
||||||
}
|
pos++;
|
||||||
if (*buf > 0xf4)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
buf += length;
|
const u8 cx = buf[pos];
|
||||||
|
|
||||||
|
if ((cx < 0x80) || (cx > 0xbf)) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user