Change need_hexify so it optionally tests for printable UTF-8 as opposed

to printable ASCII.
2025-08-03 12:28:07 +00:00 · 2016-11-05 13:27:08 +01:00 · 2016-11-05 13:27:08 +01:00 · 7d7ca48704
commit 7d7ca48704
parent fb8fb6b21d
5 changed files with 82 additions and 9 deletions
--- a/include/convert.h
+++ b/include/convert.h
@ -8,7 +8,7 @@

 #include <ctype.h>

-bool need_hexify (const u8 *buf, const int len);
+bool need_hexify (const u8 *buf, const int len, bool accept_utf8);
 void exec_hexify (const u8 *buf, const int len, u8 *out);

 bool is_valid_hex_char (const u8 c);
--- a/src/convert.c
+++ b/src/convert.c
@ -7,17 +7,84 @@
 #include "types.h"
 #include "convert.h"

-bool need_hexify (const u8 *buf, const int len)
+static bool printable_utf8 (const u8 *buf, const int len)
+{
+  u8 a;
+  int length;
+  const u8 *buf_end = buf + len;
+  const u8 *srcptr;
+  const char trailingBytesUTF8[64] = {
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+  };
+
+  while (buf < buf_end) {
+
+    // This line rejects unprintables. The rest of the function
+    // reliably rejects invalid UTF-8 sequences.
+    if (*buf < 0x20 || *buf == 0x7f) return false;
+
+    if (*buf < 0x80) {
+      buf++;
+      continue;
+    }
+
+    length = trailingBytesUTF8[*buf & 0x3f] + 1;
+    srcptr = buf + length;
+
+    if (srcptr > buf_end) return false;
+
+    switch (length) {
+    default:
+      return false;
+    case 4:
+      if ((a = (*--srcptr)) < 0x80 || a > 0xbf) return false;
+    case 3:
+      if ((a = (*--srcptr)) < 0x80 || a > 0xbf) return false;
+    case 2:
+      if ((a = (*--srcptr)) < 0x80 || a > 0xbf) return false;
+
+      switch (*buf) {
+      case 0xE0: if (a < 0xa0) return false; break;
+      case 0xED: if (a > 0x9f) return false; break;
+      case 0xF0: if (a < 0x90) return false; break;
+      case 0xF4: if (a > 0x8f) return false;
+      }
+
+    case 1:
+      if (*buf >= 0x80 && *buf < 0xc2) return false;
+    }
+    if (*buf > 0xf4)
+      return false;
+
+    buf += length;
+  }
+  return true;
+}
+
+static bool printable_ascii (const u8 *buf, const int len)
 {
  for (int i = 0; i < len; i++)
  {
    const u8 c = buf[i];

-    if (c < 0x20) return true;
-    if (c > 0x7f) return true;
+    if (c < 0x20) return false;
+    if (c > 0x7e) return false;
  }

-  return false;
+  return true;
+}
+
+bool need_hexify (const u8 *buf, const int len, bool accept_utf8)
+{
+  if (accept_utf8)
+  {
+    return !printable_utf8 (buf, len);
+  }
+  else
+  {
+    return !printable_ascii (buf, len);
+  }
 }

 void exec_hexify (const u8 *buf, const int len, u8 *out)
--- a/src/outfile.c
+++ b/src/outfile.c
@ -367,7 +367,9 @@ int outfile_write (hashcat_ctx_t *hashcat_ctx, const char *out_buf, const unsign

  if (outfile_ctx->outfile_format & OUTFILE_FMT_PLAIN)
  {
-    if ((user_options->outfile_autohex == true) && (need_hexify (plain_ptr, plain_len) == true))
+    bool accept_utf8 = hashcat_ctx->hashconfig->hash_type != HASH_TYPE_LM;
+
+    if ((user_options->outfile_autohex == true) && (need_hexify (plain_ptr, plain_len, accept_utf8) == true))
    {
      tmp_buf[tmp_len++] = '$';
      tmp_buf[tmp_len++] = 'H';
--- a/src/potfile.c
+++ b/src/potfile.c
@ -291,7 +291,9 @@ void potfile_write_append (hashcat_ctx_t *hashcat_ctx, const char *out_buf, u8 *

  if (1)
  {
-    if ((user_options->outfile_autohex == true) && (need_hexify (plain_ptr, plain_len) == true))
+    bool accept_utf8 = hashcat_ctx->hashconfig->hash_type != HASH_TYPE_LM;
+
+    if ((user_options->outfile_autohex == true) && (need_hexify (plain_ptr, plain_len, accept_utf8) == true))
    {
      tmp_buf[tmp_len++] = '$';
      tmp_buf[tmp_len++] = 'H';
--- a/src/status.c
+++ b/src/status.c
@ -582,8 +582,10 @@ char *status_get_input_candidates_dev (const hashcat_ctx_t *hashcat_ctx, const i
  build_plain ((hashcat_ctx_t *) hashcat_ctx, device_param, &plain1, plain_buf1, &plain_len1);
  build_plain ((hashcat_ctx_t *) hashcat_ctx, device_param, &plain2, plain_buf2, &plain_len2);

-  const bool need_hex1 = need_hexify (plain_ptr1, plain_len1);
-  const bool need_hex2 = need_hexify (plain_ptr2, plain_len2);
+  bool accept_utf8 = hashcat_ctx->hashconfig->hash_type != HASH_TYPE_LM;
+
+  const bool need_hex1 = need_hexify (plain_ptr1, plain_len1, accept_utf8);
+  const bool need_hex2 = need_hexify (plain_ptr2, plain_len2, accept_utf8);

  if ((need_hex1 == true) || (need_hex2 == true))
  {