hashcat/deps/unrar/unicode.cpp

#include "rar.hpp"
#define MBFUNCTIONS

#if defined(_UNIX) && defined(MBFUNCTIONS)

static bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success);
static void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success);

// In Unix we map high ASCII characters which cannot be converted to Unicode
// to 0xE000 - 0xE0FF private use Unicode area.
static const uint MapAreaStart=0xE000;

// Mapped string marker. Initially we used 0xFFFF for this purpose,
// but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker).
// While we could workaround it, it is safer to use another character.
static const uint MappedStringMark=0xFFFE;

#endif

bool WideToChar(const wchar *Src,char *Dest,size_t DestSize)
{
  bool RetCode=true;
  *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.

#ifdef _WIN_ALL
  if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0)
    RetCode=false;

// wcstombs is broken in Android NDK r9.
#elif defined(_APPLE)
  WideToUtf(Src,Dest,DestSize);

#elif defined(MBFUNCTIONS)
  if (!WideToCharMap(Src,Dest,DestSize,RetCode))
  {
    mbstate_t ps; // Use thread safe external state based functions.
    memset (&ps, 0, sizeof(ps));
    const wchar *SrcParam=Src; // wcsrtombs can change the pointer.

    // Some implementations of wcsrtombs can cause memory analyzing tools
    // like valgrind to report uninitialized data access. It happens because
    // internally these implementations call SSE4 based wcslen function,
    // which reads 16 bytes at once including those beyond of trailing 0.
    size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);

    if (ResultingSize==(size_t)-1 && errno==EILSEQ)
    {
      // Aborted on inconvertible character not zero terminating the result.
      // EILSEQ helps to distinguish it from small output buffer abort.
      // We want to convert as much as we can, so we clean the output buffer
      // and repeat conversion.
      memset (&ps, 0, sizeof(ps));
      SrcParam=Src; // wcsrtombs can change the pointer.
      memset(Dest,0,DestSize);
      ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);
    }

    if (ResultingSize==(size_t)-1)
      RetCode=false;
    if (ResultingSize==0 && *Src!=0)
      RetCode=false;
  }
#else
  for (int I=0;I<DestSize;I++)
  {
    Dest[I]=(char)Src[I];
    if (Src[I]==0)
      break;
  }
#endif
  if (DestSize>0)
    Dest[DestSize-1]=0;

  // We tried to return the empty string if conversion is failed,
  // but it does not work well. WideCharToMultiByte returns 'failed' code
  // and partially converted string even if we wanted to convert only a part
  // of string and passed DestSize smaller than required for fully converted
  // string. Such call is the valid behavior in RAR code and we do not expect
  // the empty string in this case.

  return RetCode;
}


bool CharToWide(const char *Src,wchar *Dest,size_t DestSize)
{
  bool RetCode=true;
  *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.

#ifdef _WIN_ALL
  if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0)
    RetCode=false;

// mbstowcs is broken in Android NDK r9.
#elif defined(_APPLE)
  UtfToWide(Src,Dest,DestSize);

#elif defined(MBFUNCTIONS)
  mbstate_t ps;
  memset (&ps, 0, sizeof(ps));
  const char *SrcParam=Src; // mbsrtowcs can change the pointer.
  size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps);
  if (ResultingSize==(size_t)-1)
    RetCode=false;
  if (ResultingSize==0 && *Src!=0)
    RetCode=false;

  if (RetCode==false && DestSize>1)
    CharToWideMap(Src,Dest,DestSize,RetCode);
#else
  for (int I=0;I<DestSize;I++)
  {
    Dest[I]=(wchar_t)Src[I];
    if (Src[I]==0)
      break;
  }
#endif
  if (DestSize>0)
    Dest[DestSize-1]=0;

  // We tried to return the empty string if conversion is failed,
  // but it does not work well. MultiByteToWideChar returns 'failed' code
  // even if we wanted to convert only a part of string and passed DestSize
  // smaller than required for fully converted string. Such call is the valid
  // behavior in RAR code and we do not expect the empty string in this case.

  return RetCode;
}


#if defined(_UNIX) && defined(MBFUNCTIONS)
// Convert and restore mapped inconvertible Unicode characters. 
// We use it for extended ASCII names in Unix.
bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success)
{
  // String with inconvertible characters mapped to private use Unicode area
  // must have the mark code somewhere.
  if (wcschr(Src,(wchar)MappedStringMark)==NULL)
    return false;

  // Seems to be that wcrtomb in some memory analyzing libraries
  // can produce uninitilized output while reporting success on garbage input.
  // So we clean the destination to calm analyzers.
  memset(Dest,0,DestSize);
  
  Success=true;
  uint SrcPos=0,DestPos=0;
  while (Src[SrcPos]!=0 && DestPos<DestSize-MB_CUR_MAX)
  {
    if (uint(Src[SrcPos])==MappedStringMark)
    {
      SrcPos++;
      continue;
    }
    // For security reasons do not restore low ASCII codes, so mapping cannot
    // be used to hide control codes like path separators.
    if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100)
      Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart);
    else
    {
      mbstate_t ps;
      memset(&ps,0,sizeof(ps));
      if (wcrtomb(Dest+DestPos,Src[SrcPos],&ps)==(size_t)-1)
      {
        Dest[DestPos]='_';
        Success=false;
      }
      SrcPos++;
      memset(&ps,0,sizeof(ps));
      int Length=mbrlen(Dest+DestPos,MB_CUR_MAX,&ps);
      DestPos+=Max(Length,1);
    }
  }
  Dest[Min(DestPos,DestSize-1)]=0;
  return true;
}
#endif


#if defined(_UNIX) && defined(MBFUNCTIONS)
// Convert and map inconvertible Unicode characters.
// We use it for extended ASCII names in Unix.
void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success)
{
  // Map inconvertible characters to private use Unicode area 0xE000.
  // Mark such string by placing special non-character code before
  // first inconvertible character.
  Success=false;
  bool MarkAdded=false;
  uint SrcPos=0,DestPos=0;
  while (DestPos<DestSize)
  {
    if (Src[SrcPos]==0)
    {
      Success=true;
      break;
    }
    mbstate_t ps;
    memset(&ps,0,sizeof(ps));
    size_t res=mbrtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX,&ps);
    if (res==(size_t)-1 || res==(size_t)-2)
    {
      // For security reasons we do not want to map low ASCII characters,
      // so we do not have additional .. and path separator codes.
      if (byte(Src[SrcPos])>=0x80)
      {
        if (!MarkAdded)
        {
          Dest[DestPos++]=MappedStringMark;
          MarkAdded=true;
          if (DestPos>=DestSize)
            break;
        }
        Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart;
      }
      else
        break;
    }
    else
    {
      memset(&ps,0,sizeof(ps));
      int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps);
      SrcPos+=Max(Length,1);
      DestPos++;
    }
  }
  Dest[Min(DestPos,DestSize-1)]=0;
}
#endif


// SrcSize is in wide characters, not in bytes.
byte* WideToRaw(const wchar *Src,byte *Dest,size_t SrcSize)
{
  for (size_t I=0;I<SrcSize;I++,Src++)
  {
    Dest[I*2]=(byte)*Src;
    Dest[I*2+1]=(byte)(*Src>>8);
    if (*Src==0)
      break;
  }
  return Dest;
}


wchar* RawToWide(const byte *Src,wchar *Dest,size_t DestSize)
{
  for (size_t I=0;I<DestSize;I++)
    if ((Dest[I]=Src[I*2]+(Src[I*2+1]<<8))==0)
      break;
  return Dest;
}


void WideToUtf(const wchar *Src,char *Dest,size_t DestSize)
{
  long dsize=(long)DestSize;
  dsize--;
  while (*Src!=0 && --dsize>=0)
  {
    uint c=*(Src++);
    if (c<0x80)
      *(Dest++)=c;
    else
      if (c<0x800 && --dsize>=0)
      {
        *(Dest++)=(0xc0|(c>>6));
        *(Dest++)=(0x80|(c&0x3f));
      }
      else
      {
        if (c>=0xd800 && c<=0xdbff && *Src>=0xdc00 && *Src<=0xdfff) // Surrogate pair.
        {
          c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000;
          Src++;
        }
        if (c<0x10000 && (dsize-=2)>=0)
        {
          *(Dest++)=(0xe0|(c>>12));
          *(Dest++)=(0x80|((c>>6)&0x3f));
          *(Dest++)=(0x80|(c&0x3f));
        }
        else
          if (c < 0x200000 && (dsize-=3)>=0)
          {
            *(Dest++)=(0xf0|(c>>18));
            *(Dest++)=(0x80|((c>>12)&0x3f));
            *(Dest++)=(0x80|((c>>6)&0x3f));
            *(Dest++)=(0x80|(c&0x3f));
          }
      }
  }
  *Dest=0;
}


size_t WideToUtfSize(const wchar *Src)
{
  size_t Size=0;
  for (;*Src!=0;Src++)
    if (*Src<0x80)
      Size++;
    else
      if (*Src<0x800)
        Size+=2;
      else
        if ((uint)*Src<0x10000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t.
        {
          if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff)
          {
            Size+=4; // 4 output bytes for Unicode surrogate pair.
            Src++;
          }
          else
            Size+=3;
        }
        else
          if ((uint)*Src<0x200000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t.
            Size+=4;
  return Size+1; // Include terminating zero.
}


bool UtfToWide(const char *Src,wchar *Dest,size_t DestSize)
{
  bool Success=true;
  long dsize=(long)DestSize;
  dsize--;
  while (*Src!=0)
  {
    uint c=byte(*(Src++)),d;
    if (c<0x80)
      d=c;
    else
      if ((c>>5)==6)
      {
        if ((*Src&0xc0)!=0x80)
        {
          Success=false;
          break;
        }
        d=((c&0x1f)<<6)|(*Src&0x3f);
        Src++;
      }
      else
        if ((c>>4)==14)
        {
          if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80)
          {
            Success=false;
            break;
          }
          d=((c&0xf)<<12)|((Src[0]&0x3f)<<6)|(Src[1]&0x3f);
          Src+=2;
        }
        else
          if ((c>>3)==30)
          {
            if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80 || (Src[2]&0xc0)!=0x80)
            {
              Success=false;
              break;
            }
            d=((c&7)<<18)|((Src[0]&0x3f)<<12)|((Src[1]&0x3f)<<6)|(Src[2]&0x3f);
            Src+=3;
          }
          else
          {
            Success=false;
            break;
          }
    if (--dsize<0)
      break;
    if (d>0xffff)
    {
      if (--dsize<0)
        break;
      if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629.
      {
        Success=false;
        continue;
      }
      if (sizeof(*Dest)==2) // Use the surrogate pair.
      {
        *(Dest++)=((d-0x10000)>>10)+0xd800;
        *(Dest++)=(d&0x3ff)+0xdc00;
      }
      else
        *(Dest++)=d;
    }
    else
      *(Dest++)=d;
  }
  *Dest=0;
  return Success;
}


// For zero terminated strings.
bool IsTextUtf8(const byte *Src)
{
  return IsTextUtf8(Src,strlen((const char *)Src));
}


// Source data can be both with and without UTF-8 BOM.
bool IsTextUtf8(const byte *Src,size_t SrcSize)
{
  while (SrcSize-- > 0)
  {
    byte C=*(Src++);
    int HighOne=0; // Number of leftmost '1' bits.
    for (byte Mask=0x80;Mask!=0 && (C & Mask)!=0;Mask>>=1)
      HighOne++;
    if (HighOne==1 || HighOne>6)
      return false;
    while (--HighOne > 0)
      if (SrcSize-- <= 0 || (*(Src++) & 0xc0)!=0x80)
        return false;
  }
  return true;
}


int wcsicomp(const wchar *s1,const wchar *s2)
{
#ifdef _WIN_ALL
  return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,-1,s2,-1)-2;
#else
  while (true)
  {
    wchar u1 = towupper(*s1);
    wchar u2 = towupper(*s2);
    if (u1 != u2)
      return u1 < u2 ? -1 : 1;
    if (*s1==0)
      break;
    s1++;
    s2++;
  }
  return 0;
#endif
}


int wcsnicomp(const wchar *s1,const wchar *s2,size_t n)
{
#ifdef _WIN_ALL
  // If we specify 'n' exceeding the actual string length, CompareString goes
  // beyond the trailing zero and compares garbage. So we need to limit 'n'
  // to real string length.
  size_t l1=Min(wcslen(s1)+1,n);
  size_t l2=Min(wcslen(s2)+1,n);
  return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2;
#else
  if (n==0)
    return 0;
  while (true)
  {
    wchar u1 = towupper(*s1);
    wchar u2 = towupper(*s2);
    if (u1 != u2)
      return u1 < u2 ? -1 : 1;
    if (*s1==0 || --n==0)
      break;
    s1++;
    s2++;
  }
  return 0;
#endif
}


// Case insensitive wcsstr().
const wchar_t* wcscasestr(const wchar_t *str, const wchar_t *search)
{
  for (size_t i=0;str[i]!=0;i++)
    for (size_t j=0;;j++)
    {
      if (search[j]==0)
        return str+i;
      if (tolowerw(str[i+j])!=tolowerw(search[j]))
        break;
    }
  return NULL;
}


#ifndef SFX_MODULE
wchar* wcslower(wchar *s)
{
#ifdef _WIN_ALL
  // _wcslwr requires setlocale and we do not want to depend on setlocale
  // in Windows. Also CharLower involves less overhead.
  CharLower(s);
#else
  for (wchar *c=s;*c!=0;c++)
    *c=towlower(*c);
#endif
  return s;
}
#endif


#ifndef SFX_MODULE
wchar* wcsupper(wchar *s)
{
#ifdef _WIN_ALL
  // _wcsupr requires setlocale and we do not want to depend on setlocale
  // in Windows. Also CharUpper involves less overhead.
  CharUpper(s);
#else
  for (wchar *c=s;*c!=0;c++)
    *c=towupper(*c);
#endif
  return s;
}
#endif


int toupperw(int ch)
{
#if defined(_WIN_ALL)
  // CharUpper is more reliable than towupper in Windows, which seems to be
  // C locale dependent even in Unicode version. For example, towupper failed
  // to convert lowercase Russian characters. Use 0xffff mask to prevent crash
  // if value larger than 0xffff is passed to this function.
  return (int)(INT_PTR)CharUpper((wchar *)(INT_PTR)(ch&0xffff));
#else
  return towupper(ch);
#endif
}


int tolowerw(int ch)
{
#if defined(_WIN_ALL)
  // CharLower is more reliable than towlower in Windows.
  // See comment for towupper above. Use 0xffff mask to prevent crash
  // if value larger than 0xffff is passed to this function.
  return (int)(INT_PTR)CharLower((wchar *)(INT_PTR)(ch&0xffff));
#else
  return towlower(ch);
#endif
}


int atoiw(const wchar *s)
{
  return (int)atoilw(s);
}


int64 atoilw(const wchar *s)
{
  bool sign=false;
  if (*s=='-') // We do use signed integers here, for example, in GUI SFX.
  {
    s++;
    sign=true;
  }
  // Use unsigned type here, since long string can overflow the variable
  // and signed integer overflow is undefined behavior in C++.
  uint64 n=0;
  while (*s>='0' && *s<='9')
  {
    n=n*10+(*s-'0');
    s++;
  }
  // Check int64(n)>=0 to avoid the signed overflow with undefined behavior
  // when negating 0x8000000000000000.
  return sign && int64(n)>=0 ? -int64(n) : int64(n);
}


#ifdef DBCS_SUPPORTED
SupportDBCS gdbcs;

SupportDBCS::SupportDBCS()
{
  Init();
}


void SupportDBCS::Init()
{
  CPINFO CPInfo;
  GetCPInfo(CP_ACP,&CPInfo);
  DBCSMode=CPInfo.MaxCharSize > 1;
  for (uint I=0;I<ASIZE(IsLeadByte);I++)
    IsLeadByte[I]=IsDBCSLeadByte(I)!=0;
}


char* SupportDBCS::charnext(const char *s)
{
  // Zero cannot be the trail byte. So if next byte after the lead byte
  // is 0, the string is corrupt and we'll better return the pointer to 0,
  // to break string processing loops.
  return (char *)(IsLeadByte[(byte)*s] && s[1]!=0 ? s+2:s+1);
}


size_t SupportDBCS::strlend(const char *s)
{
  size_t Length=0;
  while (*s!=0)
  {
    if (IsLeadByte[(byte)*s])
      s+=2;
    else
      s++;
    Length++;
  }
  return(Length);
}


char* SupportDBCS::strchrd(const char *s, int c)
{
  while (*s!=0)
    if (IsLeadByte[(byte)*s])
      s+=2;
    else
      if (*s==c)
        return((char *)s);
      else
        s++;
  return(NULL);
}


void SupportDBCS::copychrd(char *dest,const char *src)
{
  dest[0]=src[0];
  if (IsLeadByte[(byte)src[0]])
    dest[1]=src[1];
}


char* SupportDBCS::strrchrd(const char *s, int c)
{
  const char *found=NULL;
  while (*s!=0)
    if (IsLeadByte[(byte)*s])
      s+=2;
    else
    {
      if (*s==c)
        found=s;
      s++;
    }
  return((char *)found);
}
#endif
Added UnRAR dependency (version 5.9.4) 2020-09-08 08:34:21 +00:00			`#include "rar.hpp"`
			`#define MBFUNCTIONS`

			`#if defined(_UNIX) && defined(MBFUNCTIONS)`

			`static bool WideToCharMap(const wchar Src,char Dest,size_t DestSize,bool &Success);`
			`static void CharToWideMap(const char Src,wchar Dest,size_t DestSize,bool &Success);`

			`// In Unix we map high ASCII characters which cannot be converted to Unicode`
			`// to 0xE000 - 0xE0FF private use Unicode area.`
			`static const uint MapAreaStart=0xE000;`

			`// Mapped string marker. Initially we used 0xFFFF for this purpose,`
			`// but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker).`
			`// While we could workaround it, it is safer to use another character.`
			`static const uint MappedStringMark=0xFFFE;`

			`#endif`

			`bool WideToChar(const wchar Src,char Dest,size_t DestSize)`
			`{`
			`bool RetCode=true;`
			`*Dest=0; // Set 'Dest' to zero just in case the conversion will fail.`

			`#ifdef _WIN_ALL`
			`if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0)`
			`RetCode=false;`

			`// wcstombs is broken in Android NDK r9.`
			`#elif defined(_APPLE)`
			`WideToUtf(Src,Dest,DestSize);`

			`#elif defined(MBFUNCTIONS)`
			`if (!WideToCharMap(Src,Dest,DestSize,RetCode))`
			`{`
			`mbstate_t ps; // Use thread safe external state based functions.`
			`memset (&ps, 0, sizeof(ps));`
			`const wchar *SrcParam=Src; // wcsrtombs can change the pointer.`

			`// Some implementations of wcsrtombs can cause memory analyzing tools`
			`// like valgrind to report uninitialized data access. It happens because`
			`// internally these implementations call SSE4 based wcslen function,`
			`// which reads 16 bytes at once including those beyond of trailing 0.`
			`size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);`

			`if (ResultingSize==(size_t)-1 && errno==EILSEQ)`
			`{`
			`// Aborted on inconvertible character not zero terminating the result.`
			`// EILSEQ helps to distinguish it from small output buffer abort.`
			`// We want to convert as much as we can, so we clean the output buffer`
			`// and repeat conversion.`
			`memset (&ps, 0, sizeof(ps));`
			`SrcParam=Src; // wcsrtombs can change the pointer.`
			`memset(Dest,0,DestSize);`
			`ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);`
			`}`

			`if (ResultingSize==(size_t)-1)`
			`RetCode=false;`
			`if (ResultingSize==0 && *Src!=0)`
			`RetCode=false;`
			`}`
			`#else`
			`for (int I=0;I<DestSize;I++)`
			`{`
			`Dest[I]=(char)Src[I];`
			`if (Src[I]==0)`
			`break;`
			`}`
			`#endif`
			`if (DestSize>0)`
			`Dest[DestSize-1]=0;`

			`// We tried to return the empty string if conversion is failed,`
			`// but it does not work well. WideCharToMultiByte returns 'failed' code`
			`// and partially converted string even if we wanted to convert only a part`
			`// of string and passed DestSize smaller than required for fully converted`
			`// string. Such call is the valid behavior in RAR code and we do not expect`
			`// the empty string in this case.`

			`return RetCode;`
			`}`


			`bool CharToWide(const char Src,wchar Dest,size_t DestSize)`
			`{`
			`bool RetCode=true;`
			`*Dest=0; // Set 'Dest' to zero just in case the conversion will fail.`

			`#ifdef _WIN_ALL`
			`if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0)`
			`RetCode=false;`

			`// mbstowcs is broken in Android NDK r9.`
			`#elif defined(_APPLE)`
			`UtfToWide(Src,Dest,DestSize);`

			`#elif defined(MBFUNCTIONS)`
			`mbstate_t ps;`
			`memset (&ps, 0, sizeof(ps));`
			`const char *SrcParam=Src; // mbsrtowcs can change the pointer.`
			`size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps);`
			`if (ResultingSize==(size_t)-1)`
			`RetCode=false;`
			`if (ResultingSize==0 && *Src!=0)`
			`RetCode=false;`

			`if (RetCode==false && DestSize>1)`
			`CharToWideMap(Src,Dest,DestSize,RetCode);`
			`#else`
			`for (int I=0;I<DestSize;I++)`
			`{`
			`Dest[I]=(wchar_t)Src[I];`
			`if (Src[I]==0)`
			`break;`
			`}`
			`#endif`
			`if (DestSize>0)`
			`Dest[DestSize-1]=0;`

			`// We tried to return the empty string if conversion is failed,`
			`// but it does not work well. MultiByteToWideChar returns 'failed' code`
			`// even if we wanted to convert only a part of string and passed DestSize`
			`// smaller than required for fully converted string. Such call is the valid`
			`// behavior in RAR code and we do not expect the empty string in this case.`

			`return RetCode;`
			`}`


			`#if defined(_UNIX) && defined(MBFUNCTIONS)`
			`// Convert and restore mapped inconvertible Unicode characters.`
			`// We use it for extended ASCII names in Unix.`
			`bool WideToCharMap(const wchar Src,char Dest,size_t DestSize,bool &Success)`
			`{`
			`// String with inconvertible characters mapped to private use Unicode area`
			`// must have the mark code somewhere.`
			`if (wcschr(Src,(wchar)MappedStringMark)==NULL)`
			`return false;`

			`// Seems to be that wcrtomb in some memory analyzing libraries`
			`// can produce uninitilized output while reporting success on garbage input.`
			`// So we clean the destination to calm analyzers.`
			`memset(Dest,0,DestSize);`

			`Success=true;`
			`uint SrcPos=0,DestPos=0;`
			`while (Src[SrcPos]!=0 && DestPos<DestSize-MB_CUR_MAX)`
			`{`
			`if (uint(Src[SrcPos])==MappedStringMark)`
			`{`
			`SrcPos++;`
			`continue;`
			`}`
			`// For security reasons do not restore low ASCII codes, so mapping cannot`
			`// be used to hide control codes like path separators.`
			`if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100)`
			`Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart);`
			`else`
			`{`
			`mbstate_t ps;`
			`memset(&ps,0,sizeof(ps));`
			`if (wcrtomb(Dest+DestPos,Src[SrcPos],&ps)==(size_t)-1)`
			`{`
			`Dest[DestPos]='_';`
			`Success=false;`
			`}`
			`SrcPos++;`
			`memset(&ps,0,sizeof(ps));`
			`int Length=mbrlen(Dest+DestPos,MB_CUR_MAX,&ps);`
			`DestPos+=Max(Length,1);`
			`}`
			`}`
			`Dest[Min(DestPos,DestSize-1)]=0;`
			`return true;`
			`}`
			`#endif`


			`#if defined(_UNIX) && defined(MBFUNCTIONS)`
			`// Convert and map inconvertible Unicode characters.`
			`// We use it for extended ASCII names in Unix.`
			`void CharToWideMap(const char Src,wchar Dest,size_t DestSize,bool &Success)`
			`{`
			`// Map inconvertible characters to private use Unicode area 0xE000.`
			`// Mark such string by placing special non-character code before`
			`// first inconvertible character.`
			`Success=false;`
			`bool MarkAdded=false;`
			`uint SrcPos=0,DestPos=0;`
			`while (DestPos<DestSize)`
			`{`
			`if (Src[SrcPos]==0)`
			`{`
			`Success=true;`
			`break;`
			`}`
			`mbstate_t ps;`
			`memset(&ps,0,sizeof(ps));`
			`size_t res=mbrtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX,&ps);`
			`if (res==(size_t)-1 \|\| res==(size_t)-2)`
			`{`
			`// For security reasons we do not want to map low ASCII characters,`
			`// so we do not have additional .. and path separator codes.`
			`if (byte(Src[SrcPos])>=0x80)`
			`{`
			`if (!MarkAdded)`
			`{`
			`Dest[DestPos++]=MappedStringMark;`
			`MarkAdded=true;`
			`if (DestPos>=DestSize)`
			`break;`
			`}`
			`Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart;`
			`}`
			`else`
			`break;`
			`}`
			`else`
			`{`
			`memset(&ps,0,sizeof(ps));`
			`int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps);`
			`SrcPos+=Max(Length,1);`
			`DestPos++;`
			`}`
			`}`
			`Dest[Min(DestPos,DestSize-1)]=0;`
			`}`
			`#endif`


			`// SrcSize is in wide characters, not in bytes.`
			`byte* WideToRaw(const wchar Src,byte Dest,size_t SrcSize)`
			`{`
			`for (size_t I=0;I<SrcSize;I++,Src++)`
			`{`
			`Dest[I2]=(byte)Src;`
			`Dest[I2+1]=(byte)(Src>>8);`
			`if (*Src==0)`
			`break;`
			`}`
			`return Dest;`
			`}`


			`wchar* RawToWide(const byte Src,wchar Dest,size_t DestSize)`
			`{`
			`for (size_t I=0;I<DestSize;I++)`
			`if ((Dest[I]=Src[I2]+(Src[I2+1]<<8))==0)`
			`break;`
			`return Dest;`
			`}`


			`void WideToUtf(const wchar Src,char Dest,size_t DestSize)`
			`{`
			`long dsize=(long)DestSize;`
			`dsize--;`
			`while (*Src!=0 && --dsize>=0)`
			`{`
			`uint c=*(Src++);`
			`if (c<0x80)`
			`*(Dest++)=c;`
			`else`
			`if (c<0x800 && --dsize>=0)`
			`{`
			`*(Dest++)=(0xc0\|(c>>6));`
			`*(Dest++)=(0x80\|(c&0x3f));`
			`}`
			`else`
			`{`
			`if (c>=0xd800 && c<=0xdbff && Src>=0xdc00 && Src<=0xdfff) // Surrogate pair.`
			`{`
			`c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000;`
			`Src++;`
			`}`
			`if (c<0x10000 && (dsize-=2)>=0)`
			`{`
			`*(Dest++)=(0xe0\|(c>>12));`
			`*(Dest++)=(0x80\|((c>>6)&0x3f));`
			`*(Dest++)=(0x80\|(c&0x3f));`
			`}`
			`else`
			`if (c < 0x200000 && (dsize-=3)>=0)`
			`{`
			`*(Dest++)=(0xf0\|(c>>18));`
			`*(Dest++)=(0x80\|((c>>12)&0x3f));`
			`*(Dest++)=(0x80\|((c>>6)&0x3f));`
			`*(Dest++)=(0x80\|(c&0x3f));`
			`}`
			`}`
			`}`
			`*Dest=0;`
			`}`


			`size_t WideToUtfSize(const wchar *Src)`
			`{`
			`size_t Size=0;`
			`for (;*Src!=0;Src++)`
			`if (*Src<0x80)`
			`Size++;`
			`else`
			`if (*Src<0x800)`
			`Size+=2;`
			`else`
			`if ((uint)*Src<0x10000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t.`
			`{`
			`if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff)`
			`{`
			`Size+=4; // 4 output bytes for Unicode surrogate pair.`
			`Src++;`
			`}`
			`else`
			`Size+=3;`
			`}`
			`else`
			`if ((uint)*Src<0x200000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t.`
			`Size+=4;`
			`return Size+1; // Include terminating zero.`
			`}`


			`bool UtfToWide(const char Src,wchar Dest,size_t DestSize)`
			`{`
			`bool Success=true;`
			`long dsize=(long)DestSize;`
			`dsize--;`
			`while (*Src!=0)`
			`{`
			`uint c=byte(*(Src++)),d;`
			`if (c<0x80)`
			`d=c;`
			`else`
			`if ((c>>5)==6)`
			`{`
			`if ((*Src&0xc0)!=0x80)`
			`{`
			`Success=false;`
			`break;`
			`}`
			`d=((c&0x1f)<<6)\|(*Src&0x3f);`
			`Src++;`
			`}`
			`else`
			`if ((c>>4)==14)`
			`{`
			`if ((Src[0]&0xc0)!=0x80 \|\| (Src[1]&0xc0)!=0x80)`
			`{`
			`Success=false;`
			`break;`
			`}`
			`d=((c&0xf)<<12)\|((Src[0]&0x3f)<<6)\|(Src[1]&0x3f);`
			`Src+=2;`
			`}`
			`else`
			`if ((c>>3)==30)`
			`{`
			`if ((Src[0]&0xc0)!=0x80 \|\| (Src[1]&0xc0)!=0x80 \|\| (Src[2]&0xc0)!=0x80)`
			`{`
			`Success=false;`
			`break;`
			`}`
			`d=((c&7)<<18)\|((Src[0]&0x3f)<<12)\|((Src[1]&0x3f)<<6)\|(Src[2]&0x3f);`
			`Src+=3;`
			`}`
			`else`
			`{`
			`Success=false;`
			`break;`
			`}`
			`if (--dsize<0)`
			`break;`
			`if (d>0xffff)`
			`{`
			`if (--dsize<0)`
			`break;`
			`if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629.`
			`{`
			`Success=false;`
			`continue;`
			`}`
			`if (sizeof(*Dest)==2) // Use the surrogate pair.`
			`{`
			`*(Dest++)=((d-0x10000)>>10)+0xd800;`
			`*(Dest++)=(d&0x3ff)+0xdc00;`
			`}`
			`else`
			`*(Dest++)=d;`
			`}`
			`else`
			`*(Dest++)=d;`
			`}`
			`*Dest=0;`
			`return Success;`
			`}`


			`// For zero terminated strings.`
			`bool IsTextUtf8(const byte *Src)`
			`{`
			`return IsTextUtf8(Src,strlen((const char *)Src));`
			`}`


			`// Source data can be both with and without UTF-8 BOM.`
			`bool IsTextUtf8(const byte *Src,size_t SrcSize)`
			`{`
			`while (SrcSize-- > 0)`
			`{`
			`byte C=*(Src++);`
			`int HighOne=0; // Number of leftmost '1' bits.`
			`for (byte Mask=0x80;Mask!=0 && (C & Mask)!=0;Mask>>=1)`
			`HighOne++;`
			`if (HighOne==1 \|\| HighOne>6)`
			`return false;`
			`while (--HighOne > 0)`
			`if (SrcSize-- <= 0 \|\| (*(Src++) & 0xc0)!=0x80)`
			`return false;`
			`}`
			`return true;`
			`}`


			`int wcsicomp(const wchar s1,const wchar s2)`
			`{`
			`#ifdef _WIN_ALL`
			`return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE\|SORT_STRINGSORT,s1,-1,s2,-1)-2;`
			`#else`
			`while (true)`
			`{`
			`wchar u1 = towupper(*s1);`
			`wchar u2 = towupper(*s2);`
			`if (u1 != u2)`
			`return u1 < u2 ? -1 : 1;`
			`if (*s1==0)`
			`break;`
			`s1++;`
			`s2++;`
			`}`
			`return 0;`
			`#endif`
			`}`


			`int wcsnicomp(const wchar s1,const wchar s2,size_t n)`
			`{`
			`#ifdef _WIN_ALL`
			`// If we specify 'n' exceeding the actual string length, CompareString goes`
			`// beyond the trailing zero and compares garbage. So we need to limit 'n'`
			`// to real string length.`
			`size_t l1=Min(wcslen(s1)+1,n);`
			`size_t l2=Min(wcslen(s2)+1,n);`
			`return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE\|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2;`
			`#else`
			`if (n==0)`
			`return 0;`
			`while (true)`
			`{`
			`wchar u1 = towupper(*s1);`
			`wchar u2 = towupper(*s2);`
			`if (u1 != u2)`
			`return u1 < u2 ? -1 : 1;`
			`if (*s1==0 \|\| --n==0)`
			`break;`
			`s1++;`
			`s2++;`
			`}`
			`return 0;`
			`#endif`
			`}`


Updated unrar source from 5.9.4 to 6.0.5 2021-05-15 09:31:42 +00:00			`// Case insensitive wcsstr().`
Added UnRAR dependency (version 5.9.4) 2020-09-08 08:34:21 +00:00			`const wchar_t* wcscasestr(const wchar_t str, const wchar_t search)`
			`{`
			`for (size_t i=0;str[i]!=0;i++)`
			`for (size_t j=0;;j++)`
			`{`
			`if (search[j]==0)`
			`return str+i;`
			`if (tolowerw(str[i+j])!=tolowerw(search[j]))`
			`break;`
			`}`
			`return NULL;`
			`}`


			`#ifndef SFX_MODULE`
			`wchar* wcslower(wchar *s)`
			`{`
			`#ifdef _WIN_ALL`
			`// _wcslwr requires setlocale and we do not want to depend on setlocale`
			`// in Windows. Also CharLower involves less overhead.`
			`CharLower(s);`
			`#else`
			`for (wchar c=s;c!=0;c++)`
			`c=towlower(c);`
			`#endif`
			`return s;`
			`}`
			`#endif`


			`#ifndef SFX_MODULE`
			`wchar* wcsupper(wchar *s)`
			`{`
			`#ifdef _WIN_ALL`
			`// _wcsupr requires setlocale and we do not want to depend on setlocale`
			`// in Windows. Also CharUpper involves less overhead.`
			`CharUpper(s);`
			`#else`
			`for (wchar c=s;c!=0;c++)`
			`c=towupper(c);`
			`#endif`
			`return s;`
			`}`
			`#endif`




			`int toupperw(int ch)`
			`{`
			`#if defined(_WIN_ALL)`
			`// CharUpper is more reliable than towupper in Windows, which seems to be`
			`// C locale dependent even in Unicode version. For example, towupper failed`
			`// to convert lowercase Russian characters. Use 0xffff mask to prevent crash`
			`// if value larger than 0xffff is passed to this function.`
			`return (int)(INT_PTR)CharUpper((wchar *)(INT_PTR)(ch&0xffff));`
			`#else`
			`return towupper(ch);`
			`#endif`
			`}`


			`int tolowerw(int ch)`
			`{`
			`#if defined(_WIN_ALL)`
			`// CharLower is more reliable than towlower in Windows.`
			`// See comment for towupper above. Use 0xffff mask to prevent crash`
			`// if value larger than 0xffff is passed to this function.`
			`return (int)(INT_PTR)CharLower((wchar *)(INT_PTR)(ch&0xffff));`
			`#else`
			`return towlower(ch);`
			`#endif`
			`}`


			`int atoiw(const wchar *s)`
			`{`
			`return (int)atoilw(s);`
			`}`


			`int64 atoilw(const wchar *s)`
			`{`
			`bool sign=false;`
			`if (*s=='-') // We do use signed integers here, for example, in GUI SFX.`
			`{`
			`s++;`
			`sign=true;`
			`}`
			`// Use unsigned type here, since long string can overflow the variable`
			`// and signed integer overflow is undefined behavior in C++.`
			`uint64 n=0;`
			`while (s>='0' && s<='9')`
			`{`
			`n=n10+(s-'0');`
			`s++;`
			`}`
			`// Check int64(n)>=0 to avoid the signed overflow with undefined behavior`
			`// when negating 0x8000000000000000.`
			`return sign && int64(n)>=0 ? -int64(n) : int64(n);`
			`}`


			`#ifdef DBCS_SUPPORTED`
			`SupportDBCS gdbcs;`

			`SupportDBCS::SupportDBCS()`
			`{`
			`Init();`
			`}`


			`void SupportDBCS::Init()`
			`{`
			`CPINFO CPInfo;`
			`GetCPInfo(CP_ACP,&CPInfo);`
			`DBCSMode=CPInfo.MaxCharSize > 1;`
			`for (uint I=0;I<ASIZE(IsLeadByte);I++)`
			`IsLeadByte[I]=IsDBCSLeadByte(I)!=0;`
			`}`


			`char* SupportDBCS::charnext(const char *s)`
			`{`
			`// Zero cannot be the trail byte. So if next byte after the lead byte`
			`// is 0, the string is corrupt and we'll better return the pointer to 0,`
			`// to break string processing loops.`
			`return (char )(IsLeadByte[(byte)s] && s[1]!=0 ? s+2:s+1);`
			`}`


			`size_t SupportDBCS::strlend(const char *s)`
			`{`
			`size_t Length=0;`
			`while (*s!=0)`
			`{`
			`if (IsLeadByte[(byte)*s])`
			`s+=2;`
			`else`
			`s++;`
			`Length++;`
			`}`
			`return(Length);`
			`}`


			`char* SupportDBCS::strchrd(const char *s, int c)`
			`{`
			`while (*s!=0)`
			`if (IsLeadByte[(byte)*s])`
			`s+=2;`
			`else`
			`if (*s==c)`
			`return((char *)s);`
			`else`
			`s++;`
			`return(NULL);`
			`}`


			`void SupportDBCS::copychrd(char dest,const char src)`
			`{`
			`dest[0]=src[0];`
			`if (IsLeadByte[(byte)src[0]])`
			`dest[1]=src[1];`
			`}`


			`char* SupportDBCS::strrchrd(const char *s, int c)`
			`{`
			`const char *found=NULL;`
			`while (*s!=0)`
			`if (IsLeadByte[(byte)*s])`
			`s+=2;`
			`else`
			`{`
			`if (*s==c)`
			`found=s;`
			`s++;`
			`}`
			`return((char *)found);`
			`}`
			`#endif`