diff --git a/deps/LZMA-SDK/C/7z.h b/deps/LZMA-SDK/C/7z.h
index 82813c298..969523cd3 100644
--- a/deps/LZMA-SDK/C/7z.h
+++ b/deps/LZMA-SDK/C/7z.h
@@ -1,5 +1,5 @@
 /* 7z.h -- 7z interface
-2017-04-03 : Igor Pavlov : Public domain */
+2018-07-02 : Igor Pavlov : Public domain */
 
 #ifndef __7Z_H
 #define __7Z_H
@@ -91,6 +91,8 @@ typedef struct
   UInt64 *CoderUnpackSizes;       // for all coders in all folders
 
   Byte *CodersData;
+
+  UInt64 RangeLimit;
 } CSzAr;
 
 UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
diff --git a/deps/LZMA-SDK/C/7zArcIn.c b/deps/LZMA-SDK/C/7zArcIn.c
index 68cc12ff4..7ccc72101 100644
--- a/deps/LZMA-SDK/C/7zArcIn.c
+++ b/deps/LZMA-SDK/C/7zArcIn.c
@@ -1,5 +1,5 @@
 /* 7zArcIn.c -- 7z Input functions
-2018-12-31 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
   return SZ_OK;
 }
 
-void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
+static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
 {
   ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
   ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
 
 #define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
 
-void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
+static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
 {
   ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
   ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p)
   p->CoderUnpackSizes = NULL;
 
   p->CodersData = NULL;
+
+  p->RangeLimit = 0;
 }
 
 static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc)
@@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
         return SZ_ERROR_ARCHIVE;
       if (propsSize >= 0x80)
         return SZ_ERROR_UNSUPPORTED;
-      coder->PropsOffset = sd->Data - dataStart;
+      coder->PropsOffset = (size_t)(sd->Data - dataStart);
       coder->PropsSize = (Byte)propsSize;
       sd->Data += (size_t)propsSize;
       sd->Size -= (size_t)propsSize;
@@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
   {
     UInt32 numCoders, ci, numInStreams = 0;
     
-    p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
+    p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr);
     
     RINOK(SzReadNumber32(&sd, &numCoders));
     if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
@@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
   p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
   
   {
-    size_t dataSize = sd.Data - startBufPtr;
+    const size_t dataSize = (size_t)(sd.Data - startBufPtr);
     p->FoStartPackStreamIndex[fo] = packStreamIndex;
     p->FoCodersOffsets[fo] = dataSize;
     MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
@@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
         if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
           numSubDigests += numStreams;
       }
-      ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
+      ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data);
       continue;
     }
     if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
@@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
   {
     ssi->sdSizes.Data = sd->Data;
     RINOK(SkipNumbers(sd, numUnpackSizesInData));
-    ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
+    ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data);
     RINOK(ReadID(sd, &type));
   }
 
@@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
     {
       ssi->sdCRCs.Data = sd->Data;
       RINOK(SkipBitUi32s(sd, numSubDigests));
-      ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
+      ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data);
     }
     else
     {
@@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p,
   if (type == k7zIdPackInfo)
   {
     RINOK(ReadNumber(sd, dataOffset));
+    if (*dataOffset > p->RangeLimit)
+      return SZ_ERROR_ARCHIVE;
     RINOK(ReadPackInfo(p, sd, alloc));
+    if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset)
+      return SZ_ERROR_ARCHIVE;
     RINOK(ReadID(sd, &type));
   }
   if (type == k7zIdUnpackInfo)
@@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size
       return SZ_ERROR_ARCHIVE;
     for (p = data + pos;
       #ifdef _WIN32
-      *(const UInt16 *)p != 0
+      *(const UInt16 *)(const void *)p != 0
       #else
       p[0] != 0 || p[1] != 0
       #endif
       ; p += 2);
-    pos = p - data + 2;
+    pos = (size_t)(p - data) + 2;
     *offsets++ = (pos >> 1);
   }
   while (--numFiles);
@@ -1133,6 +1139,8 @@ static SRes SzReadHeader2(
     SRes res;
     
     SzAr_Init(&tempAr);
+    tempAr.RangeLimit = p->db.RangeLimit;
+
     res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
         p->startPosAfterHeader, &tempAr, allocTemp);
     *numTempBufs = tempAr.NumFolders;
@@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2(
   nextHeaderSize = GetUi64(header + 20);
   nextHeaderCRC = GetUi32(header + 28);
 
-  p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
+  p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize;
   
   if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
     return SZ_ERROR_CRC;
 
+  p->db.RangeLimit = nextHeaderOffset;
+
   nextHeaderSizeT = (size_t)nextHeaderSize;
   if (nextHeaderSizeT != nextHeaderSize)
     return SZ_ERROR_MEM;
@@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2(
   {
     Int64 pos = 0;
     RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
-    if ((UInt64)pos < startArcPos + nextHeaderOffset ||
-        (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
-        (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
+    if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset ||
+        (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
+        (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
       return SZ_ERROR_INPUT_EOF;
   }
 
-  RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
+  RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset));
 
   if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
     return SZ_ERROR_MEM;
@@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2(
         Buf_Init(&tempBuf);
         
         SzAr_Init(&tempAr);
+        tempAr.RangeLimit = p->db.RangeLimit;
+
         res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
         SzAr_Free(&tempAr, allocTemp);
        
diff --git a/deps/LZMA-SDK/C/7zCrc.c b/deps/LZMA-SDK/C/7zCrc.c
index 40ab75952..c7ec353d6 100644
--- a/deps/LZMA-SDK/C/7zCrc.c
+++ b/deps/LZMA-SDK/C/7zCrc.c
@@ -1,5 +1,5 @@
 /* 7zCrc.c -- CRC32 init
-2017-06-06 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -26,8 +26,20 @@
 
 typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
 
+extern
 CRC_FUNC g_CrcUpdateT4;
+CRC_FUNC g_CrcUpdateT4;
+extern
+CRC_FUNC g_CrcUpdateT8;
 CRC_FUNC g_CrcUpdateT8;
+extern
+CRC_FUNC g_CrcUpdateT0_32;
+CRC_FUNC g_CrcUpdateT0_32;
+extern
+CRC_FUNC g_CrcUpdateT0_64;
+CRC_FUNC g_CrcUpdateT0_64;
+extern
+CRC_FUNC g_CrcUpdate;
 CRC_FUNC g_CrcUpdate;
 
 UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
@@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
 
 #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
 
+UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
 UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
 {
   const Byte *p = (const Byte *)data;
@@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
   return v;
 }
 
+
+/* ---------- hardware CRC ---------- */
+
+#ifdef MY_CPU_LE
+
+#if defined(MY_CPU_ARM_OR_ARM64)
+
+// #pragma message("ARM*")
+
+  #if defined(_MSC_VER)
+    #if defined(MY_CPU_ARM64)
+    #if (_MSC_VER >= 1910)
+        #define USE_ARM64_CRC
+    #endif
+    #endif
+  #elif (defined(__clang__) && (__clang_major__ >= 3)) \
+     || (defined(__GNUC__) && (__GNUC__ > 4))
+      #if !defined(__ARM_FEATURE_CRC32)
+        #define __ARM_FEATURE_CRC32 1
+          #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
+            #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
+          #endif
+      #endif
+      #if defined(__ARM_FEATURE_CRC32)
+        #define USE_ARM64_CRC
+        #include <arm_acle.h>
+      #endif
+  #endif
+
+#else
+
+// no hardware CRC
+
+// #define USE_CRC_EMU
+
+#ifdef USE_CRC_EMU
+
+#pragma message("ARM64 CRC emulation")
+
+MY_FORCE_INLINE
+UInt32 __crc32b(UInt32 v, UInt32 data)
+{
+  const UInt32 *table = g_CrcTable;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data);
+  return v;
+}
+
+MY_FORCE_INLINE
+UInt32 __crc32w(UInt32 v, UInt32 data)
+{
+  const UInt32 *table = g_CrcTable;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  return v;
+}
+
+MY_FORCE_INLINE
+UInt32 __crc32d(UInt32 v, UInt64 data)
+{
+  const UInt32 *table = g_CrcTable;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+  return v;
+}
+
+#endif // USE_CRC_EMU
+
+#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
+
+
+
+#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
+
+#define T0_32_UNROLL_BYTES (4 * 4)
+#define T0_64_UNROLL_BYTES (4 * 8)
+
+#ifndef ATTRIB_CRC
+#define ATTRIB_CRC
+#endif
+// #pragma message("USE ARM HW CRC")
+
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  UNUSED_VAR(table);
+
+  for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
+    v = __crc32b(v, *p++);
+
+  if (size >= T0_32_UNROLL_BYTES)
+  {
+    const Byte *lim = p + size;
+    size &= (T0_32_UNROLL_BYTES - 1);
+    lim -= size;
+    do
+    {
+      v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+      v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+      v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+      v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+    }
+    while (p != lim);
+  }
+  
+  for (; size != 0; size--)
+    v = __crc32b(v, *p++);
+
+  return v;
+}
+
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  UNUSED_VAR(table);
+
+  for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
+    v = __crc32b(v, *p++);
+
+  if (size >= T0_64_UNROLL_BYTES)
+  {
+    const Byte *lim = p + size;
+    size &= (T0_64_UNROLL_BYTES - 1);
+    lim -= size;
+    do
+    {
+      v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+      v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+      v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+      v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+    }
+    while (p != lim);
+  }
+  
+  for (; size != 0; size--)
+    v = __crc32b(v, *p++);
+
+  return v;
+}
+
+#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
+
+#endif // MY_CPU_LE
+
+
+
+
 void MY_FAST_CALL CrcGenerateTable()
 {
   UInt32 i;
@@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable()
     }
   }
   #endif
+  #endif
 
+  #ifdef MY_CPU_LE
+    #ifdef USE_ARM64_CRC
+      if (CPU_IsSupported_CRC32())
+      {
+        g_CrcUpdateT0_32 = CrcUpdateT0_32;
+        g_CrcUpdateT0_64 = CrcUpdateT0_64;
+        g_CrcUpdate =
+          #if defined(MY_CPU_ARM)
+            CrcUpdateT0_32;
+          #else
+            CrcUpdateT0_64;
+          #endif
+      }
+    #endif
+    
+    #ifdef USE_CRC_EMU
+      g_CrcUpdateT0_32 = CrcUpdateT0_32;
+      g_CrcUpdateT0_64 = CrcUpdateT0_64;
+      g_CrcUpdate = CrcUpdateT0_64;
+    #endif
   #endif
 }
diff --git a/deps/LZMA-SDK/C/7zCrcOpt.c b/deps/LZMA-SDK/C/7zCrcOpt.c
index 2ee0de845..efaa7ab9d 100644
--- a/deps/LZMA-SDK/C/7zCrcOpt.c
+++ b/deps/LZMA-SDK/C/7zCrcOpt.c
@@ -1,5 +1,5 @@
 /* 7zCrcOpt.c -- CRC32 calculation
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -9,6 +9,7 @@
 
 #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
 
+UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
 UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
 {
   const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
     v = CRC_UPDATE_BYTE_2(v, *p);
   for (; size >= 4; size -= 4, p += 4)
   {
-    v ^= *(const UInt32 *)p;
+    v ^= *(const UInt32 *)(const void *)p;
     v =
           (table + 0x300)[((v      ) & 0xFF)]
         ^ (table + 0x200)[((v >>  8) & 0xFF)]
@@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
   return v;
 }
 
+UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
 UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
 {
   const Byte *p = (const Byte *)data;
@@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U
   for (; size >= 8; size -= 8, p += 8)
   {
     UInt32 d;
-    v ^= *(const UInt32 *)p;
+    v ^= *(const UInt32 *)(const void *)p;
     v =
           (table + 0x700)[((v      ) & 0xFF)]
         ^ (table + 0x600)[((v >>  8) & 0xFF)]
         ^ (table + 0x500)[((v >> 16) & 0xFF)]
         ^ (table + 0x400)[((v >> 24))];
-    d = *((const UInt32 *)p + 1);
+    d = *((const UInt32 *)(const void *)p + 1);
     v ^=
           (table + 0x300)[((d      ) & 0xFF)]
         ^ (table + 0x200)[((d >>  8) & 0xFF)]
@@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co
     v = CRC_UPDATE_BYTE_2_BE(v, *p);
   for (; size >= 4; size -= 4, p += 4)
   {
-    v ^= *(const UInt32 *)p;
+    v ^= *(const UInt32 *)(const void *)p;
     v =
           (table + 0x000)[((v      ) & 0xFF)]
         ^ (table + 0x100)[((v >>  8) & 0xFF)]
@@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co
   for (; size >= 8; size -= 8, p += 8)
   {
     UInt32 d;
-    v ^= *(const UInt32 *)p;
+    v ^= *(const UInt32 *)(const void *)p;
     v =
           (table + 0x400)[((v      ) & 0xFF)]
         ^ (table + 0x500)[((v >>  8) & 0xFF)]
         ^ (table + 0x600)[((v >> 16) & 0xFF)]
         ^ (table + 0x700)[((v >> 24))];
-    d = *((const UInt32 *)p + 1);
+    d = *((const UInt32 *)(const void *)p + 1);
     v ^=
           (table + 0x000)[((d      ) & 0xFF)]
         ^ (table + 0x100)[((d >>  8) & 0xFF)]
diff --git a/deps/LZMA-SDK/C/7zDec.c b/deps/LZMA-SDK/C/7zDec.c
index 2a7b09030..83e37d166 100644
--- a/deps/LZMA-SDK/C/7zDec.c
+++ b/deps/LZMA-SDK/C/7zDec.c
@@ -1,5 +1,5 @@
 /* 7zDec.c -- Decoding from 7z folder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -21,17 +21,20 @@
 #endif
 
 #define k_Copy 0
-#define k_Delta 3
+#ifndef _7Z_NO_METHOD_LZMA2
 #define k_LZMA2 0x21
+#endif
 #define k_LZMA  0x30101
-#define k_BCJ   0x3030103
 #define k_BCJ2  0x303011B
+#ifndef _7Z_NO_METHODS_FILTERS
+#define k_Delta 3
+#define k_BCJ   0x3030103
 #define k_PPC   0x3030205
 #define k_IA64  0x3030401
 #define k_ARM   0x3030501
 #define k_ARMT  0x3030701
 #define k_SPARC 0x3030805
-
+#endif
 
 #ifdef _7ZIP_PPMD_SUPPPORT
 
@@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp)
     return *p->cur++;
   if (p->res == SZ_OK)
   {
-    size_t size = p->cur - p->begin;
+    size_t size = (size_t)(p->cur - p->begin);
     p->processed += size;
     p->res = ILookInStream_Skip(p->inStream, size);
     size = (1 << 25);
@@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
     Ppmd7_Init(&ppmd, order);
   }
   {
-    CPpmd7z_RangeDec rc;
-    Ppmd7z_RangeDec_CreateVTable(&rc);
-    rc.Stream = &s.vt;
-    if (!Ppmd7z_RangeDec_Init(&rc))
+    ppmd.rc.dec.Stream = &s.vt;
+    if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec))
       res = SZ_ERROR_DATA;
-    else if (s.extra)
-      res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
-    else
+    else if (!s.extra)
     {
-      SizeT i;
-      for (i = 0; i < outSize; i++)
+      Byte *buf = outBuffer;
+      const Byte *lim = buf + outSize;
+      for (; buf != lim; buf++)
       {
-        int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
+        int sym = Ppmd7z_DecodeSymbol(&ppmd);
         if (s.extra || sym < 0)
           break;
-        outBuffer[i] = (Byte)sym;
+        *buf = (Byte)sym;
       }
-      if (i != outSize)
-        res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
-      else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
+      if (buf != lim)
+        res = SZ_ERROR_DATA;
+      else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec))
+      {
+        /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */
         res = SZ_ERROR_DATA;
+      }
     }
+    if (s.extra)
+      res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+    else if (s.processed + (size_t)(s.cur - s.begin) != inSize)
+      res = SZ_ERROR_DATA;
   }
   Ppmd7_Free(&ppmd, allocMain);
   return res;
@@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
   return SZ_ERROR_UNSUPPORTED;
 }
 
+#ifndef _7Z_NO_METHODS_FILTERS
 #define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
+#endif
 
 static SRes SzFolder_Decode2(const CSzFolder *folder,
     const Byte *propsData,
diff --git a/deps/LZMA-SDK/C/7zFile.c b/deps/LZMA-SDK/C/7zFile.c
index e486901e3..900125d52 100644
--- a/deps/LZMA-SDK/C/7zFile.c
+++ b/deps/LZMA-SDK/C/7zFile.c
@@ -1,5 +1,5 @@
 /* 7zFile.c -- File IO
-2017-04-03 : Igor Pavlov : Public domain */
+2021-04-29 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -7,9 +7,19 @@
 
 #ifndef USE_WINDOWS_FILE
 
-#ifndef UNDER_CE
-#include <errno.h>
-#endif
+  #include <errno.h>
+
+  #ifndef USE_FOPEN
+    #include <stdio.h>
+    #include <fcntl.h>
+    #ifdef _WIN32
+      #include <io.h>
+      typedef int ssize_t;
+      typedef int off_t;
+    #else
+      #include <unistd.h>
+    #endif
+  #endif
 
 #else
 
@@ -23,30 +33,36 @@
    And message can be "Network connection was lost"
 */
 
-#define kChunkSizeMax (1 << 22)
-
 #endif
 
+#define kChunkSizeMax (1 << 22)
+
 void File_Construct(CSzFile *p)
 {
   #ifdef USE_WINDOWS_FILE
   p->handle = INVALID_HANDLE_VALUE;
-  #else
+  #elif defined(USE_FOPEN)
   p->file = NULL;
+  #else
+  p->fd = -1;
   #endif
 }
 
 #if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
+
 static WRes File_Open(CSzFile *p, const char *name, int writeMode)
 {
   #ifdef USE_WINDOWS_FILE
+  
   p->handle = CreateFileA(name,
       writeMode ? GENERIC_WRITE : GENERIC_READ,
       FILE_SHARE_READ, NULL,
       writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
       FILE_ATTRIBUTE_NORMAL, NULL);
   return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
-  #else
+  
+  #elif defined(USE_FOPEN)
+  
   p->file = fopen(name, writeMode ? "wb+" : "rb");
   return (p->file != 0) ? 0 :
     #ifdef UNDER_CE
@@ -54,13 +70,34 @@ static WRes File_Open(CSzFile *p, const char *name, int writeMode)
     #else
     errno;
     #endif
+  
+  #else
+
+  int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY);
+  #ifdef O_BINARY
+  flags |= O_BINARY;
+  #endif
+  p->fd = open(name, flags, 0666);
+  return (p->fd != -1) ? 0 : errno;
+
   #endif
 }
 
 WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
-WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
+
+WRes OutFile_Open(CSzFile *p, const char *name)
+{
+  #if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN)
+  return File_Open(p, name, 1);
+  #else
+  p->fd = creat(name, 0666);
+  return (p->fd != -1) ? 0 : errno;
+  #endif
+}
+
 #endif
 
+
 #ifdef USE_WINDOWS_FILE
 static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
 {
@@ -78,74 +115,124 @@ WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1
 WRes File_Close(CSzFile *p)
 {
   #ifdef USE_WINDOWS_FILE
+  
   if (p->handle != INVALID_HANDLE_VALUE)
   {
     if (!CloseHandle(p->handle))
       return GetLastError();
     p->handle = INVALID_HANDLE_VALUE;
   }
-  #else
+  
+  #elif defined(USE_FOPEN)
+
   if (p->file != NULL)
   {
     int res = fclose(p->file);
     if (res != 0)
+    {
+      if (res == EOF)
+        return errno;
       return res;
+    }
     p->file = NULL;
   }
+
+  #else
+
+  if (p->fd != -1)
+  {
+    if (close(p->fd) != 0)
+      return errno;
+    p->fd = -1;
+  }
+
   #endif
+
   return 0;
 }
 
+
 WRes File_Read(CSzFile *p, void *data, size_t *size)
 {
   size_t originalSize = *size;
+  *size = 0;
   if (originalSize == 0)
     return 0;
 
   #ifdef USE_WINDOWS_FILE
 
-  *size = 0;
   do
   {
-    DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+    const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
     DWORD processed = 0;
-    BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
+    const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
     data = (void *)((Byte *)data + processed);
     originalSize -= processed;
     *size += processed;
     if (!res)
       return GetLastError();
+    // debug : we can break here for partial reading mode
+    if (processed == 0)
+      break;
+  }
+  while (originalSize > 0);
+
+  #elif defined(USE_FOPEN)
+
+  do
+  {
+    const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+    const size_t processed = fread(data, 1, curSize, p->file);
+    data = (void *)((Byte *)data + (size_t)processed);
+    originalSize -= processed;
+    *size += processed;
+    if (processed != curSize)
+      return ferror(p->file);
+    // debug : we can break here for partial reading mode
     if (processed == 0)
       break;
   }
   while (originalSize > 0);
-  return 0;
 
   #else
-  
-  *size = fread(data, 1, originalSize, p->file);
-  if (*size == originalSize)
-    return 0;
-  return ferror(p->file);
-  
+
+  do
+  {
+    const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+    const ssize_t processed = read(p->fd, data, curSize);
+    if (processed == -1)
+      return errno;
+    if (processed == 0)
+      break;
+    data = (void *)((Byte *)data + (size_t)processed);
+    originalSize -= (size_t)processed;
+    *size += (size_t)processed;
+    // debug : we can break here for partial reading mode
+    // break;
+  }
+  while (originalSize > 0);
+
   #endif
+
+  return 0;
 }
 
+
 WRes File_Write(CSzFile *p, const void *data, size_t *size)
 {
   size_t originalSize = *size;
+  *size = 0;
   if (originalSize == 0)
     return 0;
   
   #ifdef USE_WINDOWS_FILE
 
-  *size = 0;
   do
   {
-    DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+    const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
     DWORD processed = 0;
-    BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
-    data = (void *)((Byte *)data + processed);
+    const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
+    data = (const void *)((const Byte *)data + processed);
     originalSize -= processed;
     *size += processed;
     if (!res)
@@ -154,26 +241,52 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
       break;
   }
   while (originalSize > 0);
-  return 0;
+
+  #elif defined(USE_FOPEN)
+
+  do
+  {
+    const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+    const size_t processed = fwrite(data, 1, curSize, p->file);
+    data = (void *)((Byte *)data + (size_t)processed);
+    originalSize -= processed;
+    *size += processed;
+    if (processed != curSize)
+      return ferror(p->file);
+    if (processed == 0)
+      break;
+  }
+  while (originalSize > 0);
 
   #else
 
-  *size = fwrite(data, 1, originalSize, p->file);
-  if (*size == originalSize)
-    return 0;
-  return ferror(p->file);
-  
+  do
+  {
+    const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+    const ssize_t processed = write(p->fd, data, curSize);
+    if (processed == -1)
+      return errno;
+    if (processed == 0)
+      break;
+    data = (void *)((Byte *)data + (size_t)processed);
+    originalSize -= (size_t)processed;
+    *size += (size_t)processed;
+  }
+  while (originalSize > 0);
+
   #endif
+
+  return 0;
 }
 
+
 WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
 {
   #ifdef USE_WINDOWS_FILE
 
-  LARGE_INTEGER value;
   DWORD moveMethod;
-  value.LowPart = (DWORD)*pos;
-  value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
+  UInt32 low = (UInt32)*pos;
+  LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
   switch (origin)
   {
     case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
@@ -181,34 +294,52 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
     case SZ_SEEK_END: moveMethod = FILE_END; break;
     default: return ERROR_INVALID_PARAMETER;
   }
-  value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
-  if (value.LowPart == 0xFFFFFFFF)
+  low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod);
+  if (low == (UInt32)0xFFFFFFFF)
   {
     WRes res = GetLastError();
     if (res != NO_ERROR)
       return res;
   }
-  *pos = ((Int64)value.HighPart << 32) | value.LowPart;
+  *pos = ((Int64)high << 32) | low;
   return 0;
 
   #else
   
-  int moveMethod;
-  int res;
+  int moveMethod; // = origin;
+
   switch (origin)
   {
     case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
     case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
     case SZ_SEEK_END: moveMethod = SEEK_END; break;
-    default: return 1;
+    default: return EINVAL;
   }
-  res = fseek(p->file, (long)*pos, moveMethod);
-  *pos = ftell(p->file);
-  return res;
   
-  #endif
+  #if defined(USE_FOPEN)
+  {
+    int res = fseek(p->file, (long)*pos, moveMethod);
+    if (res == -1)
+      return errno;
+    *pos = ftell(p->file);
+    if (*pos == -1)
+      return errno;
+    return 0;
+  }
+  #else
+  {
+    off_t res = lseek(p->fd, (off_t)*pos, moveMethod);
+    if (res == -1)
+      return errno;
+    *pos = res;
+    return 0;
+  }
+  
+  #endif // USE_FOPEN
+  #endif // USE_WINDOWS_FILE
 }
 
+
 WRes File_GetLength(CSzFile *p, UInt64 *length)
 {
   #ifdef USE_WINDOWS_FILE
@@ -224,13 +355,31 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
   *length = (((UInt64)sizeHigh) << 32) + sizeLow;
   return 0;
   
-  #else
+  #elif defined(USE_FOPEN)
   
   long pos = ftell(p->file);
   int res = fseek(p->file, 0, SEEK_END);
   *length = ftell(p->file);
   fseek(p->file, pos, SEEK_SET);
   return res;
+
+  #else
+
+  off_t pos;
+  *length = 0;
+  pos = lseek(p->fd, 0, SEEK_CUR);
+  if (pos != -1)
+  {
+    const off_t len2 = lseek(p->fd, 0, SEEK_END);
+    const off_t res2 = lseek(p->fd, pos, SEEK_SET);
+    if (len2 != -1)
+    {
+      *length = (UInt64)len2;
+      if (res2 != -1)
+        return 0;
+    }
+  }
+  return errno;
   
   #endif
 }
@@ -241,7 +390,9 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
 static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
 {
   CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
-  return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
+  WRes wres = File_Read(&p->file, buf, size);
+  p->wres = wres;
+  return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
 }
 
 void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
@@ -255,13 +406,17 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
 static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
 {
   CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
-  return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
+  WRes wres = File_Read(&p->file, buf, size);
+  p->wres = wres;
+  return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
 }
 
 static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
 {
   CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
-  return File_Seek(&p->file, pos, origin);
+  WRes wres = File_Seek(&p->file, pos, origin);
+  p->wres = wres;
+  return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
 }
 
 void FileInStream_CreateVTable(CFileInStream *p)
@@ -276,7 +431,8 @@ void FileInStream_CreateVTable(CFileInStream *p)
 static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
 {
   CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
-  File_Write(&p->file, data, &size);
+  WRes wres = File_Write(&p->file, data, &size);
+  p->wres = wres;
   return size;
 }
 
diff --git a/deps/LZMA-SDK/C/7zFile.h b/deps/LZMA-SDK/C/7zFile.h
index 7e263bea1..c7a30fc2b 100644
--- a/deps/LZMA-SDK/C/7zFile.h
+++ b/deps/LZMA-SDK/C/7zFile.h
@@ -1,17 +1,20 @@
 /* 7zFile.h -- File IO
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-15 : Igor Pavlov : Public domain */
 
 #ifndef __7Z_FILE_H
 #define __7Z_FILE_H
 
 #ifdef _WIN32
 #define USE_WINDOWS_FILE
+// #include <windows.h>
 #endif
 
 #ifdef USE_WINDOWS_FILE
 #include <windows.h>
 #else
-#include <stdio.h>
+// note: USE_FOPEN mode is limited to 32-bit file size
+// #define USE_FOPEN
+// #include <stdio.h>
 #endif
 
 #include "7zTypes.h"
@@ -24,8 +27,10 @@ typedef struct
 {
   #ifdef USE_WINDOWS_FILE
   HANDLE handle;
-  #else
+  #elif defined(USE_FOPEN)
   FILE *file;
+  #else
+  int fd;
   #endif
 } CSzFile;
 
@@ -56,6 +61,7 @@ typedef struct
 {
   ISeqInStream vt;
   CSzFile file;
+  WRes wres;
 } CFileSeqInStream;
 
 void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
@@ -65,6 +71,7 @@ typedef struct
 {
   ISeekInStream vt;
   CSzFile file;
+  WRes wres;
 } CFileInStream;
 
 void FileInStream_CreateVTable(CFileInStream *p);
@@ -74,6 +81,7 @@ typedef struct
 {
   ISeqOutStream vt;
   CSzFile file;
+  WRes wres;
 } CFileOutStream;
 
 void FileOutStream_CreateVTable(CFileOutStream *p);
diff --git a/deps/LZMA-SDK/C/7zStream.c b/deps/LZMA-SDK/C/7zStream.c
index 579741fad..4b472a41d 100644
--- a/deps/LZMA-SDK/C/7zStream.c
+++ b/deps/LZMA-SDK/C/7zStream.c
@@ -1,5 +1,5 @@
 /* 7zStream.c -- 7z Stream functions
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
 
 SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
 {
-  Int64 t = offset;
+  Int64 t = (Int64)offset;
   return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
 }
 
diff --git a/deps/LZMA-SDK/C/7zTypes.h b/deps/LZMA-SDK/C/7zTypes.h
index 593f5aa25..497b14506 100644
--- a/deps/LZMA-SDK/C/7zTypes.h
+++ b/deps/LZMA-SDK/C/7zTypes.h
@@ -1,11 +1,13 @@
 /* 7zTypes.h -- Basic types
-2018-08-04 : Igor Pavlov : Public domain */
+2021-04-25 : Igor Pavlov : Public domain */
 
 #ifndef __7Z_TYPES_H
 #define __7Z_TYPES_H
 
 #ifdef _WIN32
 /* #include <windows.h> */
+#else
+#include <errno.h>
 #endif
 
 #include <stddef.h>
@@ -43,18 +45,112 @@ EXTERN_C_BEGIN
 typedef int SRes;
 
 
+#ifdef _MSC_VER
+  #if _MSC_VER > 1200
+    #define MY_ALIGN(n) __declspec(align(n))
+  #else
+    #define MY_ALIGN(n)
+  #endif
+#else
+  #define MY_ALIGN(n) __attribute__ ((aligned(n)))
+#endif
+
+
 #ifdef _WIN32
 
 /* typedef DWORD WRes; */
 typedef unsigned WRes;
 #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
 
-#else
+#else // _WIN32
 
+// #define ENV_HAVE_LSTAT
 typedef int WRes;
-#define MY__FACILITY_WIN32 7
-#define MY__FACILITY__WRes MY__FACILITY_WIN32
-#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
+#define MY__FACILITY_ERRNO  0x800
+#define MY__FACILITY_WIN32  7
+#define MY__FACILITY__WRes  MY__FACILITY_ERRNO
+
+#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
+          ( (HRESULT)(x) & 0x0000FFFF) \
+          | (MY__FACILITY__WRes << 16)  \
+          | (HRESULT)0x80000000 ))
+
+#define MY_SRes_HRESULT_FROM_WRes(x) \
+  ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
+
+// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
+#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
+
+/*
+#define ERROR_FILE_NOT_FOUND             2L
+#define ERROR_ACCESS_DENIED              5L
+#define ERROR_NO_MORE_FILES              18L
+#define ERROR_LOCK_VIOLATION             33L
+#define ERROR_FILE_EXISTS                80L
+#define ERROR_DISK_FULL                  112L
+#define ERROR_NEGATIVE_SEEK              131L
+#define ERROR_ALREADY_EXISTS             183L
+#define ERROR_DIRECTORY                  267L
+#define ERROR_TOO_MANY_POSTS             298L
+
+#define ERROR_INVALID_REPARSE_DATA       4392L
+#define ERROR_REPARSE_TAG_INVALID        4393L
+#define ERROR_REPARSE_TAG_MISMATCH       4394L
+*/
+
+// we use errno equivalents for some WIN32 errors:
+
+#define ERROR_INVALID_FUNCTION      EINVAL
+#define ERROR_ALREADY_EXISTS        EEXIST
+#define ERROR_FILE_EXISTS           EEXIST
+#define ERROR_PATH_NOT_FOUND        ENOENT
+#define ERROR_FILE_NOT_FOUND        ENOENT
+#define ERROR_DISK_FULL             ENOSPC
+// #define ERROR_INVALID_HANDLE        EBADF
+
+// we use FACILITY_WIN32 for errors that has no errno equivalent
+// Too many posts were made to a semaphore.
+#define ERROR_TOO_MANY_POSTS        ((HRESULT)0x8007012AL)
+#define ERROR_INVALID_REPARSE_DATA  ((HRESULT)0x80071128L)
+#define ERROR_REPARSE_TAG_INVALID   ((HRESULT)0x80071129L)
+
+// if (MY__FACILITY__WRes != FACILITY_WIN32),
+// we use FACILITY_WIN32 for COM errors:
+#define E_OUTOFMEMORY               ((HRESULT)0x8007000EL)
+#define E_INVALIDARG                ((HRESULT)0x80070057L)
+#define MY__E_ERROR_NEGATIVE_SEEK   ((HRESULT)0x80070083L)
+
+/*
+// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
+#define E_OUTOFMEMORY             MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
+#define E_INVALIDARG              MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+*/
+
+// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits
+typedef          long INT_PTR;
+typedef unsigned long UINT_PTR;
+
+#define TEXT(quote) quote
+
+#define FILE_ATTRIBUTE_READONLY       0x0001
+#define FILE_ATTRIBUTE_HIDDEN         0x0002
+#define FILE_ATTRIBUTE_SYSTEM         0x0004
+#define FILE_ATTRIBUTE_DIRECTORY      0x0010
+#define FILE_ATTRIBUTE_ARCHIVE        0x0020
+#define FILE_ATTRIBUTE_DEVICE         0x0040
+#define FILE_ATTRIBUTE_NORMAL         0x0080
+#define FILE_ATTRIBUTE_TEMPORARY      0x0100
+#define FILE_ATTRIBUTE_SPARSE_FILE    0x0200
+#define FILE_ATTRIBUTE_REPARSE_POINT  0x0400
+#define FILE_ATTRIBUTE_COMPRESSED     0x0800
+#define FILE_ATTRIBUTE_OFFLINE        0x1000
+#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
+#define FILE_ATTRIBUTE_ENCRYPTED      0x4000
+
+#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000   /* trick for Unix */
 
 #endif
 
@@ -63,6 +159,10 @@ typedef int WRes;
 #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
 #endif
 
+#ifndef RINOK_WRes
+#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
 typedef unsigned char Byte;
 typedef short Int16;
 typedef unsigned short UInt16;
@@ -75,6 +175,38 @@ typedef int Int32;
 typedef unsigned int UInt32;
 #endif
 
+
+#ifndef _WIN32
+
+typedef int INT;
+typedef Int32 INT32;
+typedef unsigned int UINT;
+typedef UInt32 UINT32;
+typedef INT32 LONG;   // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
+typedef UINT32 ULONG;
+
+#undef DWORD
+typedef UINT32 DWORD;
+
+#define VOID void
+
+#define HRESULT LONG
+
+typedef void *LPVOID;
+// typedef void VOID;
+// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
+// gcc / clang on Unix  : sizeof(long==sizeof(void*) in 32 or 64 bits)
+typedef          long  INT_PTR;
+typedef unsigned long  UINT_PTR;
+typedef          long  LONG_PTR;
+typedef unsigned long  DWORD_PTR;
+
+typedef size_t SIZE_T;
+
+#endif //  _WIN32
+
+
+
 #ifdef _SZ_NO_INT_64
 
 /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
@@ -128,25 +260,37 @@ typedef int BoolInt;
 #define MY_CDECL __cdecl
 #define MY_FAST_CALL __fastcall
 
-#else
+#else //  _MSC_VER
 
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
+    || (defined(__clang__) && (__clang_major__ >= 4)) \
+    || defined(__INTEL_COMPILER) \
+    || defined(__xlC__)
+#define MY_NO_INLINE __attribute__((noinline))
+// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
+#else
 #define MY_NO_INLINE
+#endif
+
 #define MY_FORCE_INLINE
-#define MY_CDECL
-#define MY_FAST_CALL
 
-/* inline keyword : for C++ / C99 */
 
-/* GCC, clang: */
-/*
-#if defined (__GNUC__) && (__GNUC__ >= 4)
-#define MY_FORCE_INLINE __attribute__((always_inline))
-#define MY_NO_INLINE __attribute__((noinline))
-#endif
-*/
+#define MY_CDECL
 
+#if  defined(_M_IX86) \
+  || defined(__i386__)
+// #define MY_FAST_CALL __attribute__((fastcall))
+// #define MY_FAST_CALL __attribute__((cdecl))
+#define MY_FAST_CALL
+#elif defined(MY_CPU_AMD64)
+// #define MY_FAST_CALL __attribute__((ms_abi))
+#define MY_FAST_CALL
+#else
+#define MY_FAST_CALL
 #endif
 
+#endif //  _MSC_VER
+
 
 /* The following interfaces use first parameter as pointer to structure */
 
@@ -335,12 +479,11 @@ struct ISzAlloc
     GCC 4.8.1 : classes with non-public variable members"
 */
 
-#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
-
+#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
 
 #endif
 
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
 
 /*
 #define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
@@ -353,6 +496,7 @@ struct ISzAlloc
 */
 
 
+#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
 
 #ifdef _WIN32
 
diff --git a/deps/LZMA-SDK/C/7zVersion.h b/deps/LZMA-SDK/C/7zVersion.h
index 0074c64be..0fe636abc 100644
--- a/deps/LZMA-SDK/C/7zVersion.h
+++ b/deps/LZMA-SDK/C/7zVersion.h
@@ -1,7 +1,7 @@
-#define MY_VER_MAJOR 19
-#define MY_VER_MINOR 00
+#define MY_VER_MAJOR 21
+#define MY_VER_MINOR 02
 #define MY_VER_BUILD 0
-#define MY_VERSION_NUMBERS "19.00"
+#define MY_VERSION_NUMBERS "21.02 alpha"
 #define MY_VERSION MY_VERSION_NUMBERS
 
 #ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
   #define MY_VERSION_CPU MY_VERSION
 #endif
 
-#define MY_DATE "2019-02-21"
+#define MY_DATE "2021-05-06"
 #undef MY_COPYRIGHT
 #undef MY_VERSION_COPYRIGHT_DATE
 #define MY_AUTHOR_NAME "Igor Pavlov"
 #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
+#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov"
 
 #ifdef USE_COPYRIGHT_CR
   #define MY_COPYRIGHT MY_COPYRIGHT_CR
diff --git a/deps/LZMA-SDK/C/7zip_gcc_c.mak b/deps/LZMA-SDK/C/7zip_gcc_c.mak
new file mode 100644
index 000000000..00ecfb043
--- /dev/null
+++ b/deps/LZMA-SDK/C/7zip_gcc_c.mak
@@ -0,0 +1,301 @@
+
+MY_ARCH_2 = $(MY_ARCH)
+
+MY_ASM = jwasm
+MY_ASM = asmc
+
+PROGPATH = $(O)/$(PROG)
+
+
+# for object file
+CFLAGS_BASE_LIST = -c
+# for ASM file
+# CFLAGS_BASE_LIST = -S
+CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) -Wall -Werror -Wextra $(CFLAGS_WARN) \
+ -DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+
+
+LDFLAGS_STATIC = -DNDEBUG
+# -static
+
+ifdef SystemDrive
+IS_MINGW = 1
+endif
+
+ifdef DEF_FILE
+
+
+ifdef IS_MINGW
+SHARED_EXT=.dll
+LDFLAGS = -shared -DEF $(DEF_FILE) $(LDFLAGS_STATIC)
+else
+SHARED_EXT=.so
+LDFLAGS = -shared -fPIC  $(LDFLAGS_STATIC)
+CC_SHARED=-fPIC
+endif
+
+
+else
+
+LDFLAGS = $(LDFLAGS_STATIC)
+# -s is not required for clang, do we need it for GGC ???
+# -s
+
+#-static -static-libgcc -static-libstdc++
+
+ifdef IS_MINGW
+SHARED_EXT=.exe
+else
+SHARED_EXT=
+endif
+
+endif
+
+
+PROGPATH = $(O)/$(PROG)$(SHARED_EXT)
+
+	
+ifndef O
+O=_o
+endif
+
+ifdef IS_MINGW
+
+RM = del
+MY_MKDIR=mkdir
+LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32
+
+
+CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE
+# -Wno-delete-non-virtual-dtor
+
+DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll
+ 
+else
+
+RM = rm -f
+MY_MKDIR=mkdir -p
+# CFLAGS_BASE := $(CFLAGS_BASE) -D_7ZIP_ST
+# CXXFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+
+# LOCAL_LIBS=-lpthread
+# LOCAL_LIBS_DLL=$(LOCAL_LIBS) -ldl
+LIB2 = -lpthread -ldl
+
+DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(OBJS)
+
+endif
+
+
+
+CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CC_SHARED) -o $@
+
+
+ifdef IS_X64
+AFLAGS_ABI = -elf64 -DABI_LINUX
+else
+AFLAGS_ABI = -elf -DABI_LINUX -DABI_CDECL
+# -DABI_CDECL
+# -DABI_LINUX
+# -DABI_CDECL
+endif
+AFLAGS = $(AFLAGS_ABI) -Fo$(O)/
+
+
+CXX_WARN_FLAGS =
+#-Wno-invalid-offsetof
+#-Wno-reorder
+
+CXXFLAGS = $(LOCAL_FLAGS) $(CXXFLAGS_BASE2) $(CFLAGS_BASE) $(CXXFLAGS_EXTRA) $(CC_SHARED) -o $@ $(CXX_WARN_FLAGS)
+
+all: $(O) $(PROGPATH)
+
+$(O):
+	$(MY_MKDIR) $(O)
+
+$(PROGPATH): $(OBJS)
+	$(CXX) -s -o $(PROGPATH) $(MY_ARCH_2) $(LDFLAGS) $(OBJS) $(MY_LIBS) $(LIB2)
+
+
+
+ifndef NO_DEFAULT_RES
+$O/resource.o: resource.rc
+	windres.exe $(RFLAGS) resource.rc $O/resource.o
+endif
+
+
+
+$O/7zAlloc.o: ../../../C/7zAlloc.c
+	$(CC) $(CFLAGS) $<
+$O/7zArcIn.o: ../../../C/7zArcIn.c
+	$(CC) $(CFLAGS) $<
+$O/7zBuf.o: ../../../C/7zBuf.c
+	$(CC) $(CFLAGS) $<
+$O/7zBuf2.o: ../../../C/7zBuf2.c
+	$(CC) $(CFLAGS) $<
+$O/7zCrc.o: ../../../C/7zCrc.c
+	$(CC) $(CFLAGS) $<
+$O/7zDec.o: ../../../C/7zDec.c
+	$(CC) $(CFLAGS) $<
+$O/7zFile.o: ../../../C/7zFile.c
+	$(CC) $(CFLAGS) $<
+$O/7zStream.o: ../../../C/7zStream.c
+	$(CC) $(CFLAGS) $<
+$O/Aes.o: ../../../C/Aes.c
+	$(CC) $(CFLAGS) $<
+$O/Alloc.o: ../../../C/Alloc.c
+	$(CC) $(CFLAGS) $<
+$O/Bcj2.o: ../../../C/Bcj2.c
+	$(CC) $(CFLAGS) $<
+$O/Bcj2Enc.o: ../../../C/Bcj2Enc.c
+	$(CC) $(CFLAGS) $<
+$O/Blake2s.o: ../../../C/Blake2s.c
+	$(CC) $(CFLAGS) $<
+$O/Bra.o: ../../../C/Bra.c
+	$(CC) $(CFLAGS) $<
+$O/Bra86.o: ../../../C/Bra86.c
+	$(CC) $(CFLAGS) $<
+$O/BraIA64.o: ../../../C/BraIA64.c
+	$(CC) $(CFLAGS) $<
+$O/BwtSort.o: ../../../C/BwtSort.c
+	$(CC) $(CFLAGS) $<
+
+$O/CpuArch.o: ../../../C/CpuArch.c
+	$(CC) $(CFLAGS) $<
+$O/Delta.o: ../../../C/Delta.c
+	$(CC) $(CFLAGS) $<
+$O/DllSecur.o: ../../../C/DllSecur.c
+	$(CC) $(CFLAGS) $<
+$O/HuffEnc.o: ../../../C/HuffEnc.c
+	$(CC) $(CFLAGS) $<
+$O/LzFind.o: ../../../C/LzFind.c
+	$(CC) $(CFLAGS) $<
+
+# ifdef MT_FILES
+$O/LzFindMt.o: ../../../C/LzFindMt.c
+	$(CC) $(CFLAGS) $<
+
+$O/Threads.o: ../../../C/Threads.c
+	$(CC) $(CFLAGS) $<
+# endif
+
+$O/LzmaEnc.o: ../../../C/LzmaEnc.c
+	$(CC) $(CFLAGS) $<
+$O/Lzma86Dec.o: ../../../C/Lzma86Dec.c
+	$(CC) $(CFLAGS) $<
+$O/Lzma86Enc.o: ../../../C/Lzma86Enc.c
+	$(CC) $(CFLAGS) $<
+$O/Lzma2Dec.o: ../../../C/Lzma2Dec.c
+	$(CC) $(CFLAGS) $<
+$O/Lzma2DecMt.o: ../../../C/Lzma2DecMt.c
+	$(CC) $(CFLAGS) $<
+$O/Lzma2Enc.o: ../../../C/Lzma2Enc.c
+	$(CC) $(CFLAGS) $<
+$O/LzmaLib.o: ../../../C/LzmaLib.c
+	$(CC) $(CFLAGS) $<
+$O/MtCoder.o: ../../../C/MtCoder.c
+	$(CC) $(CFLAGS) $<
+$O/MtDec.o: ../../../C/MtDec.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd7.o: ../../../C/Ppmd7.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd7aDec.o: ../../../C/Ppmd7aDec.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd7Dec.o: ../../../C/Ppmd7Dec.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd7Enc.o: ../../../C/Ppmd7Enc.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd8.o: ../../../C/Ppmd8.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd8Dec.o: ../../../C/Ppmd8Dec.c
+	$(CC) $(CFLAGS) $<
+$O/Ppmd8Enc.o: ../../../C/Ppmd8Enc.c
+	$(CC) $(CFLAGS) $<
+$O/Sha1.o: ../../../C/Sha1.c
+	$(CC) $(CFLAGS) $<
+$O/Sha256.o: ../../../C/Sha256.c
+	$(CC) $(CFLAGS) $<
+$O/Sort.o: ../../../C/Sort.c
+	$(CC) $(CFLAGS) $<
+$O/Xz.o: ../../../C/Xz.c
+	$(CC) $(CFLAGS) $<
+$O/XzCrc64.o: ../../../C/XzCrc64.c
+	$(CC) $(CFLAGS) $<
+
+
+ifdef USE_ASM
+ifdef IS_X64
+USE_X86_ASM=1
+else
+ifdef IS_X86
+USE_X86_ASM=1
+endif
+endif
+endif
+
+ifdef USE_X86_ASM
+$O/7zCrcOpt.o: ../../../Asm/x86/7zCrcOpt.asm
+	$(MY_ASM) $(AFLAGS) $<
+$O/XzCrc64Opt.o: ../../../Asm/x86/XzCrc64Opt.asm
+	$(MY_ASM) $(AFLAGS) $<
+$O/AesOpt.o: ../../../Asm/x86/AesOpt.asm
+	$(MY_ASM) $(AFLAGS) $<
+$O/Sha1Opt.o: ../../../Asm/x86/Sha1Opt.asm
+	$(MY_ASM) $(AFLAGS) $<
+$O/Sha256Opt.o: ../../../Asm/x86/Sha256Opt.asm
+	$(MY_ASM) $(AFLAGS) $<
+else
+$O/7zCrcOpt.o: ../../7zCrcOpt.c
+	$(CC) $(CFLAGS) $<
+$O/XzCrc64Opt.o: ../../XzCrc64Opt.c
+	$(CC) $(CFLAGS) $<
+$O/Sha1Opt.o: ../../Sha1Opt.c
+	$(CC) $(CFLAGS) $<
+$O/Sha256Opt.o: ../../Sha256Opt.c
+	$(CC) $(CFLAGS) $<
+$O/AesOpt.o: ../../AesOpt.c
+	$(CC) $(CFLAGS) $<
+endif
+
+
+ifdef USE_LZMA_DEC_ASM
+
+ifdef IS_X64
+$O/LzmaDecOpt.o: ../../../Asm/x86/LzmaDecOpt.asm
+	$(MY_ASM) $(AFLAGS) $<
+endif
+
+ifdef IS_ARM64
+$O/LzmaDecOpt.o: ../../../Asm/arm64/LzmaDecOpt.S ../../../Asm/arm64/7zAsm.S
+	$(CC) $(CFLAGS) $<
+endif
+
+$O/LzmaDec.o: ../../LzmaDec.c
+	$(CC) $(CFLAGS) -D_LZMA_DEC_OPT $<
+
+else
+
+$O/LzmaDec.o: ../../LzmaDec.c
+	$(CC) $(CFLAGS) $<
+
+endif
+
+
+
+$O/XzDec.o: ../../../C/XzDec.c
+	$(CC) $(CFLAGS) $<
+$O/XzEnc.o: ../../../C/XzEnc.c
+	$(CC) $(CFLAGS) $<
+$O/XzIn.o: ../../../C/XzIn.c
+	$(CC) $(CFLAGS) $<
+
+
+$O/7zMain.o: ../../../C/Util/7z/7zMain.c
+	$(CC) $(CFLAGS) $<
+$O/LzmaUtil.o: ../../../C/Util/Lzma/LzmaUtil.c
+	$(CC) $(CFLAGS) $<
+
+
+
+clean:
+	-$(DEL_OBJ_EXE)
diff --git a/deps/LZMA-SDK/C/Aes.c b/deps/LZMA-SDK/C/Aes.c
index 8f7d50ea2..0f0ddc87a 100644
--- a/deps/LZMA-SDK/C/Aes.c
+++ b/deps/LZMA-SDK/C/Aes.c
@@ -1,10 +1,17 @@
 /* Aes.c -- AES encryption / decryption
-2017-01-24 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
-#include "Aes.h"
 #include "CpuArch.h"
+#include "Aes.h"
+
+AES_CODE_FUNC g_AesCbc_Decode;
+#ifndef _SFX
+AES_CODE_FUNC g_AesCbc_Encode;
+AES_CODE_FUNC g_AesCtr_Code;
+UInt32 g_Aes_SupportedFunctions_Flags;
+#endif
 
 static UInt32 T[256 * 4];
 static const Byte Sbox[256] = {
@@ -25,23 +32,10 @@ static const Byte Sbox[256] = {
   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
 
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-AES_CODE_FUNC g_AesCbc_Encode;
-AES_CODE_FUNC g_AesCbc_Decode;
-AES_CODE_FUNC g_AesCtr_Code;
 
 static UInt32 D[256 * 4];
 static Byte InvS[256];
 
-static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
-
 #define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
 
 #define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
@@ -57,6 +51,36 @@ static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0
 #define DD(x) (D + (x << 8))
 
 
+// #define _SHOW_AES_STATUS
+
+#ifdef MY_CPU_X86_OR_AMD64
+  #define USE_HW_AES
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+  #if defined(__clang__)
+    #if (__clang_major__ >= 8) // fix that check
+      #define USE_HW_AES
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ >= 6) // fix that check
+      #define USE_HW_AES
+    #endif
+  #elif defined(_MSC_VER)
+    #if _MSC_VER >= 1910
+      #define USE_HW_AES
+    #endif
+  #endif
+#endif
+
+#ifdef USE_HW_AES
+#ifdef _SHOW_AES_STATUS
+#include <stdio.h>
+#define _PRF(x) x
+#else
+#define _PRF(x)
+#endif
+#endif
+
+
 void AesGenTables(void)
 {
   unsigned i;
@@ -90,18 +114,48 @@ void AesGenTables(void)
     }
   }
   
-  g_AesCbc_Encode = AesCbc_Encode;
-  g_AesCbc_Decode = AesCbc_Decode;
-  g_AesCtr_Code = AesCtr_Code;
+  {
+  AES_CODE_FUNC d = AesCbc_Decode;
+  #ifndef _SFX
+  AES_CODE_FUNC e = AesCbc_Encode;
+  AES_CODE_FUNC c = AesCtr_Code;
+  UInt32 flags = 0;
+  #endif
   
-  #ifdef MY_CPU_X86_OR_AMD64
-  if (CPU_Is_Aes_Supported())
+  #ifdef USE_HW_AES
+  if (CPU_IsSupported_AES())
   {
-    g_AesCbc_Encode = AesCbc_Encode_Intel;
-    g_AesCbc_Decode = AesCbc_Decode_Intel;
-    g_AesCtr_Code = AesCtr_Code_Intel;
+    // #pragma message ("AES HW")
+    _PRF(printf("\n===AES HW\n"));
+    d = AesCbc_Decode_HW;
+
+    #ifndef _SFX
+    e = AesCbc_Encode_HW;
+    c = AesCtr_Code_HW;
+    flags = k_Aes_SupportedFunctions_HW;
+    #endif
+
+    #ifdef MY_CPU_X86_OR_AMD64
+    if (CPU_IsSupported_VAES_AVX2())
+    {
+      _PRF(printf("\n===vaes avx2\n"));
+      d = AesCbc_Decode_HW_256;
+      #ifndef _SFX
+      c = AesCtr_Code_HW_256;
+      flags |= k_Aes_SupportedFunctions_HW_256;
+      #endif
+    }
+    #endif
   }
   #endif
+
+  g_AesCbc_Decode = d;
+  #ifndef _SFX
+  g_AesCbc_Encode = e;
+  g_AesCtr_Code = c;
+  g_Aes_SupportedFunctions_Flags = flags;
+  #endif
+  }
 }
 
 
@@ -142,8 +196,11 @@ void AesGenTables(void)
 
 void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
 {
-  unsigned i, wSize;
-  wSize = keySize + 28;
+  unsigned i, m;
+  const UInt32 *wLim;
+  UInt32 t;
+  UInt32 rcon = 1;
+  
   keySize /= 4;
   w[0] = ((UInt32)keySize / 2) + 3;
   w += 4;
@@ -151,16 +208,26 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
   for (i = 0; i < keySize; i++, key += 4)
     w[i] = GetUi32(key);
 
-  for (; i < wSize; i++)
+  t = w[(size_t)keySize - 1];
+  wLim = w + (size_t)keySize * 3 + 28;
+  m = 0;
+  do
   {
-    UInt32 t = w[(size_t)i - 1];
-    unsigned rem = i % keySize;
-    if (rem == 0)
-      t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
-    else if (keySize > 6 && rem == 4)
+    if (m == 0)
+    {
+      t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
+      rcon <<= 1;
+      if (rcon & 0x100)
+        rcon = 0x1b;
+      m = keySize;
+    }
+    else if (m == 4 && keySize > 6)
       t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
-    w[i] = w[i - keySize] ^ t;
+    m--;
+    t ^= w[0];
+    w[keySize] = t;
   }
+  while (++w != wLim);
 }
 
 void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
@@ -184,6 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
   src and dest are pointers to 4 UInt32 words.
   src and dest can point to same block */
 
+// MY_FORCE_INLINE
 static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
 {
   UInt32 s[4];
@@ -207,6 +275,7 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
   FT4(0); FT4(1); FT4(2); FT4(3);
 }
 
+MY_FORCE_INLINE
 static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
 {
   UInt32 s[4];
@@ -294,7 +363,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
       UInt32 t = temp[i];
 
       #ifdef MY_CPU_LE_UNALIGN
-        *((UInt32 *)data) ^= t;
+        *((UInt32 *)(void *)data) ^= t;
       #else
         data[0] ^= (t & 0xFF);
         data[1] ^= ((t >> 8) & 0xFF);
diff --git a/deps/LZMA-SDK/C/Aes.h b/deps/LZMA-SDK/C/Aes.h
index 381e979d1..602e25ea2 100644
--- a/deps/LZMA-SDK/C/Aes.h
+++ b/deps/LZMA-SDK/C/Aes.h
@@ -1,5 +1,5 @@
 /* Aes.h -- AES encryption / decryption
-2013-01-18 : Igor Pavlov : Public domain */
+2018-04-28 : Igor Pavlov : Public domain */
 
 #ifndef __AES_H
 #define __AES_H
@@ -26,12 +26,34 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize)
 
 /* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
 void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
+
 /* data - 16-byte aligned pointer to data */
 /* numBlocks - the number of 16-byte blocks in data array */
 typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
-extern AES_CODE_FUNC g_AesCbc_Encode;
+
 extern AES_CODE_FUNC g_AesCbc_Decode;
+#ifndef _SFX
+extern AES_CODE_FUNC g_AesCbc_Encode;
 extern AES_CODE_FUNC g_AesCtr_Code;
+#define k_Aes_SupportedFunctions_HW     (1 << 2)
+#define k_Aes_SupportedFunctions_HW_256 (1 << 3)
+extern UInt32 g_Aes_SupportedFunctions_Flags;
+#endif
+
+
+#define DECLARE__AES_CODE_FUNC(funcName) \
+    void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
+
+DECLARE__AES_CODE_FUNC (AesCbc_Encode)
+DECLARE__AES_CODE_FUNC (AesCbc_Decode)
+DECLARE__AES_CODE_FUNC (AesCtr_Code)
+
+DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)
+DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)
+DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)
+
+DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)
+DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)
 
 EXTERN_C_END
 
diff --git a/deps/LZMA-SDK/C/AesOpt.c b/deps/LZMA-SDK/C/AesOpt.c
index 0e7f49a1b..1bdc9a882 100644
--- a/deps/LZMA-SDK/C/AesOpt.c
+++ b/deps/LZMA-SDK/C/AesOpt.c
@@ -1,184 +1,776 @@
-/* AesOpt.c -- Intel's AES
-2017-06-08 : Igor Pavlov : Public domain */
+/* AesOpt.c -- AES optimized code for x86 AES hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
 #include "CpuArch.h"
 
 #ifdef MY_CPU_X86_OR_AMD64
-#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
-#define USE_INTEL_AES
+
+  #if defined(__clang__)
+    #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
+      #define USE_INTEL_AES
+        #define ATTRIB_AES __attribute__((__target__("aes")))
+      #if (__clang_major__ >= 8)
+        #define USE_INTEL_VAES
+        #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
+      #endif
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+      #define USE_INTEL_AES
+      #ifndef __AES__
+        #define ATTRIB_AES __attribute__((__target__("aes")))
+      #endif
+      #if (__GNUC__ >= 8)
+        #define USE_INTEL_VAES
+        #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
+      #endif
+    #endif
+  #elif defined(__INTEL_COMPILER)
+    #if (__INTEL_COMPILER >= 1110)
+      #define USE_INTEL_AES
+      #if (__INTEL_COMPILER >= 1900)
+        #define USE_INTEL_VAES
+      #endif
+    #endif
+  #elif defined(_MSC_VER)
+    #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
+      #define USE_INTEL_AES
+      #if (_MSC_VER >= 1910)
+        #define USE_INTEL_VAES
+      #endif
+    #endif
+  #endif
+
+#ifndef ATTRIB_AES
+  #define ATTRIB_AES
 #endif
+#ifndef ATTRIB_VAES
+  #define ATTRIB_VAES
 #endif
 
+
 #ifdef USE_INTEL_AES
 
 #include <wmmintrin.h>
 
-void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+#ifndef USE_INTEL_VAES
+#define AES_TYPE_keys __m128i
+#define AES_TYPE_data __m128i
+#endif
+
+#define AES_FUNC_START(name) \
+    void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks)
+
+#define AES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_AES \
+AES_FUNC_START (name)
+
+#define MM_OP(op, dest, src)  dest = op(dest, src);
+#define MM_OP_m(op, src)      MM_OP(op, m, src);
+
+#define MM_XOR( dest, src)    MM_OP(_mm_xor_si128,    dest, src);
+#define AVX_XOR(dest, src)    MM_OP(_mm256_xor_si256, dest, src);
+
+
+AES_FUNC_START2 (AesCbc_Encode_HW)
 {
   __m128i m = *p;
+  const __m128i k0 = p[2];
+  const __m128i k1 = p[3];
+  const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
   for (; numBlocks != 0; numBlocks--, data++)
   {
-    UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
-    const __m128i *w = p + 3;
-    m = _mm_xor_si128(m, *data);
-    m = _mm_xor_si128(m, p[2]);
+    UInt32 r = numRounds2;
+    const __m128i *w = p + 4;
+    __m128i temp = *data;
+    MM_XOR (temp, k0);
+    MM_XOR (m, temp);
+    MM_OP_m (_mm_aesenc_si128, k1);
     do
     {
-      m = _mm_aesenc_si128(m, w[0]);
-      m = _mm_aesenc_si128(m, w[1]);
+      MM_OP_m (_mm_aesenc_si128, w[0]);
+      MM_OP_m (_mm_aesenc_si128, w[1]);
       w += 2;
     }
-    while (--numRounds2 != 0);
-    m = _mm_aesenc_si128(m, w[0]);
-    m = _mm_aesenclast_si128(m, w[1]);
+    while (--r);
+    MM_OP_m (_mm_aesenclast_si128, w[0]);
     *data = m;
   }
   *p = m;
 }
 
-#define NUM_WAYS 3
 
-#define AES_OP_W(op, n) { \
-    const __m128i t = w[n]; \
-    m0 = op(m0, t); \
-    m1 = op(m1, t); \
-    m2 = op(m2, t); \
-    }
+#define WOP_1(op)
+#define WOP_2(op)   WOP_1 (op)  op (m1, 1);
+#define WOP_3(op)   WOP_2 (op)  op (m2, 2);
+#define WOP_4(op)   WOP_3 (op)  op (m3, 3);
+#ifdef MY_CPU_AMD64
+#define WOP_5(op)   WOP_4 (op)  op (m4, 4);
+#define WOP_6(op)   WOP_5 (op)  op (m5, 5);
+#define WOP_7(op)   WOP_6 (op)  op (m6, 6);
+#define WOP_8(op)   WOP_7 (op)  op (m7, 7);
+#endif
+/*
+#define WOP_9(op)   WOP_8 (op)  op (m8, 8);
+#define WOP_10(op)  WOP_9 (op)  op (m9, 9);
+#define WOP_11(op)  WOP_10(op)  op (m10, 10);
+#define WOP_12(op)  WOP_11(op)  op (m11, 11);
+#define WOP_13(op)  WOP_12(op)  op (m12, 12);
+#define WOP_14(op)  WOP_13(op)  op (m13, 13);
+*/
+
+#ifdef MY_CPU_AMD64
+  #define NUM_WAYS      8
+  #define WOP_M1    WOP_8
+#else
+  #define NUM_WAYS      4
+  #define WOP_M1    WOP_4
+#endif
+
+#define WOP(op)  op (m0, 0);  WOP_M1(op)
+
+
+#define DECLARE_VAR(reg, ii)  __m128i reg
+#define LOAD_data(  reg, ii)  reg = data[ii];
+#define STORE_data( reg, ii)  data[ii] = reg;
+#if (NUM_WAYS > 1)
+#define XOR_data_M1(reg, ii)  MM_XOR (reg, data[ii- 1]);
+#endif
+
+#define AVX__DECLARE_VAR(reg, ii)  __m256i reg
+#define AVX__LOAD_data(  reg, ii)  reg = ((const __m256i *)(const void *)data)[ii];
+#define AVX__STORE_data( reg, ii)  ((__m256i *)(void *)data)[ii] = reg;
+#define AVX__XOR_data_M1(reg, ii)  AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]));
+
+#define MM_OP_key(op, reg)  MM_OP(op, reg, key);
+
+#define AES_DEC(      reg, ii)   MM_OP_key (_mm_aesdec_si128,     reg)
+#define AES_DEC_LAST( reg, ii)   MM_OP_key (_mm_aesdeclast_si128, reg)
+#define AES_ENC(      reg, ii)   MM_OP_key (_mm_aesenc_si128,     reg)
+#define AES_ENC_LAST( reg, ii)   MM_OP_key (_mm_aesenclast_si128, reg)
+#define AES_XOR(      reg, ii)   MM_OP_key (_mm_xor_si128,        reg)
+
 
-#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
-#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
-#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
-#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
+#define AVX__AES_DEC(      reg, ii)   MM_OP_key (_mm256_aesdec_epi128,     reg)
+#define AVX__AES_DEC_LAST( reg, ii)   MM_OP_key (_mm256_aesdeclast_epi128, reg)
+#define AVX__AES_ENC(      reg, ii)   MM_OP_key (_mm256_aesenc_epi128,     reg)
+#define AVX__AES_ENC_LAST( reg, ii)   MM_OP_key (_mm256_aesenclast_epi128, reg)
+#define AVX__AES_XOR(      reg, ii)   MM_OP_key (_mm256_xor_si256,         reg)
 
-void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+#define CTR_START(reg, ii)  MM_OP (_mm_add_epi64, ctr, one); reg = ctr;
+#define CTR_END(  reg, ii)  MM_XOR (data[ii], reg);
+
+#define AVX__CTR_START(reg, ii)  MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key);
+#define AVX__CTR_END(  reg, ii)  AVX_XOR (((__m256i *)(void *)data)[ii], reg);
+
+#define WOP_KEY(op, n) { \
+    const __m128i key = w[n]; \
+    WOP(op); }
+
+#define AVX__WOP_KEY(op, n) { \
+    const __m256i key = w[n]; \
+    WOP(op); }
+
+
+#define WIDE_LOOP_START  \
+    dataEnd = data + numBlocks;  \
+    if (numBlocks >= NUM_WAYS)  \
+    { dataEnd -= NUM_WAYS; do {  \
+
+
+#define WIDE_LOOP_END  \
+    data += NUM_WAYS;  \
+    } while (data <= dataEnd);  \
+    dataEnd += NUM_WAYS; }  \
+
+
+#define SINGLE_LOOP  \
+    for (; data < dataEnd; data++)
+
+
+#define NUM_AES_KEYS_MAX 15
+
+#define WIDE_LOOP_START_AVX(OP)  \
+    dataEnd = data + numBlocks;  \
+    if (numBlocks >= NUM_WAYS * 2)  \
+    { __m256i keys[NUM_AES_KEYS_MAX]; \
+    UInt32 ii; \
+    OP \
+    for (ii = 0; ii < numRounds; ii++) \
+      keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \
+    dataEnd -= NUM_WAYS * 2; do {  \
+
+
+#define WIDE_LOOP_END_AVX(OP)  \
+    data += NUM_WAYS * 2;  \
+    } while (data <= dataEnd);  \
+    dataEnd += NUM_WAYS * 2;  \
+    OP  \
+    _mm256_zeroupper();  \
+    }  \
+
+/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
+   MSVC still can insert vzeroupper instruction. */
+
+
+AES_FUNC_START2 (AesCbc_Decode_HW)
 {
   __m128i iv = *p;
-  for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+  const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1;
+  const __m128i *dataEnd;
+  p += 2;
+  
+  WIDE_LOOP_START
   {
-    UInt32 numRounds2 = *(const UInt32 *)(p + 1);
-    const __m128i *w = p + numRounds2 * 2;
-    __m128i m0, m1, m2;
+    const __m128i *w = wStart;
+    
+    WOP (DECLARE_VAR)
+    WOP (LOAD_data);
+    WOP_KEY (AES_XOR, 1)
+
+    do
     {
-      const __m128i t = w[2];
-      m0 = _mm_xor_si128(t, data[0]);
-      m1 = _mm_xor_si128(t, data[1]);
-      m2 = _mm_xor_si128(t, data[2]);
+      WOP_KEY (AES_DEC, 0)
+      w--;
     }
-    numRounds2--;
+    while (w != p);
+    WOP_KEY (AES_DEC_LAST, 0)
+
+    MM_XOR (m0, iv);
+    WOP_M1 (XOR_data_M1)
+    iv = data[NUM_WAYS - 1];
+    WOP (STORE_data);
+  }
+  WIDE_LOOP_END
+
+  SINGLE_LOOP
+  {
+    const __m128i *w = wStart - 1;
+    __m128i m = _mm_xor_si128 (w[2], *data);
     do
     {
-      AES_DEC(1)
-      AES_DEC(0)
+      MM_OP_m (_mm_aesdec_si128, w[1]);
+      MM_OP_m (_mm_aesdec_si128, w[0]);
       w -= 2;
     }
-    while (--numRounds2 != 0);
-    AES_DEC(1)
-    AES_DEC_LAST(0)
+    while (w != p);
+    MM_OP_m (_mm_aesdec_si128,     w[1]);
+    MM_OP_m (_mm_aesdeclast_si128, w[0]);
 
+    MM_XOR (m, iv);
+    iv = *data;
+    *data = m;
+  }
+  
+  p[-2] = iv;
+}
+
+
+AES_FUNC_START2 (AesCtr_Code_HW)
+{
+  __m128i ctr = *p;
+  UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1;
+  const __m128i *dataEnd;
+  __m128i one = _mm_cvtsi32_si128(1);
+
+  p += 2;
+  
+  WIDE_LOOP_START
+  {
+    const __m128i *w = p;
+    UInt32 r = numRoundsMinus2;
+    WOP (DECLARE_VAR)
+    WOP (CTR_START);
+    WOP_KEY (AES_XOR, 0)
+    w += 1;
+    do
     {
-      __m128i t;
-      t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
-      t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
-      t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
+      WOP_KEY (AES_ENC, 0)
+      w += 1;
     }
+    while (--r);
+    WOP_KEY (AES_ENC_LAST, 0)
+   
+    WOP (CTR_END);
   }
-  for (; numBlocks != 0; numBlocks--, data++)
+  WIDE_LOOP_END
+
+  SINGLE_LOOP
+  {
+    UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
+    const __m128i *w = p;
+    __m128i m;
+    MM_OP (_mm_add_epi64, ctr, one);
+    m = _mm_xor_si128 (ctr, p[0]);
+    w += 1;
+    do
+    {
+      MM_OP_m (_mm_aesenc_si128, w[0]);
+      MM_OP_m (_mm_aesenc_si128, w[1]);
+      w += 2;
+    }
+    while (--numRounds2);
+    MM_OP_m (_mm_aesenc_si128,     w[0]);
+    MM_OP_m (_mm_aesenclast_si128, w[1]);
+    MM_XOR (*data, m);
+  }
+  
+  p[-2] = ctr;
+}
+
+
+
+#ifdef USE_INTEL_VAES
+
+#if defined(__clang__) && defined(_MSC_VER)
+#define __SSE4_2__
+#define __AES__
+#define __AVX__
+#define __AVX2__
+#define __VAES__
+#define __AVX512F__
+#define __AVX512VL__
+#endif
+
+#include <immintrin.h>
+
+#define VAES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_VAES \
+AES_FUNC_START (name)
+
+VAES_FUNC_START2 (AesCbc_Decode_HW_256)
+{
+  __m128i iv = *p;
+  const __m128i *dataEnd;
+  UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
+  p += 2;
+  
+  WIDE_LOOP_START_AVX(;)
   {
-    UInt32 numRounds2 = *(const UInt32 *)(p + 1);
-    const __m128i *w = p + numRounds2 * 2;
-    __m128i m = _mm_xor_si128(w[2], *data);
-    numRounds2--;
+    const __m256i *w = keys + numRounds - 2;
+    
+    WOP (AVX__DECLARE_VAR)
+    WOP (AVX__LOAD_data);
+    AVX__WOP_KEY (AVX__AES_XOR, 1)
+
     do
     {
-      m = _mm_aesdec_si128(m, w[1]);
-      m = _mm_aesdec_si128(m, w[0]);
+      AVX__WOP_KEY (AVX__AES_DEC, 0)
+      w--;
+    }
+    while (w != keys);
+    AVX__WOP_KEY (AVX__AES_DEC_LAST, 0)
+
+    AVX_XOR (m0, _mm256_setr_m128i(iv, data[0]));
+    WOP_M1 (AVX__XOR_data_M1)
+    iv = data[NUM_WAYS * 2 - 1];
+    WOP (AVX__STORE_data);
+  }
+  WIDE_LOOP_END_AVX(;)
+
+  SINGLE_LOOP
+  {
+    const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3;
+    __m128i m = _mm_xor_si128 (w[2], *data);
+    do
+    {
+      MM_OP_m (_mm_aesdec_si128, w[1]);
+      MM_OP_m (_mm_aesdec_si128, w[0]);
       w -= 2;
     }
-    while (--numRounds2 != 0);
-    m = _mm_aesdec_si128(m, w[1]);
-    m = _mm_aesdeclast_si128(m, w[0]);
+    while (w != p);
+    MM_OP_m (_mm_aesdec_si128,     w[1]);
+    MM_OP_m (_mm_aesdeclast_si128, w[0]);
 
-    m = _mm_xor_si128(m, iv);
+    MM_XOR (m, iv);
     iv = *data;
     *data = m;
   }
-  *p = iv;
+  
+  p[-2] = iv;
 }
 
-void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+
+/*
+SSE2: _mm_cvtsi32_si128 : movd
+AVX:  _mm256_setr_m128i            : vinsertf128
+AVX2: _mm256_add_epi64             : vpaddq ymm, ymm, ymm
+      _mm256_extracti128_si256     : vextracti128
+      _mm256_broadcastsi128_si256  : vbroadcasti128
+*/
+
+#define AVX__CTR_LOOP_START  \
+    ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \
+    two = _mm256_setr_m128i(one, one); \
+    two = _mm256_add_epi64(two, two); \
+
+// two = _mm256_setr_epi64x(2, 0, 2, 0);
+  
+#define AVX__CTR_LOOP_ENC  \
+    ctr = _mm256_extracti128_si256 (ctr2, 1); \
+ 
+VAES_FUNC_START2 (AesCtr_Code_HW_256)
 {
   __m128i ctr = *p;
-  __m128i one;
-  one.m128i_u64[0] = 1;
-  one.m128i_u64[1] = 0;
-  for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+  UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
+  const __m128i *dataEnd;
+  __m128i one = _mm_cvtsi32_si128(1);
+  __m256i ctr2, two;
+  p += 2;
+  
+  WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START)
   {
-    UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
-    const __m128i *w = p;
-    __m128i m0, m1, m2;
-    {
-      const __m128i t = w[2];
-      ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
-      ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
-      ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
-    }
-    w += 3;
+    const __m256i *w = keys;
+    UInt32 r = numRounds - 2;
+    WOP (AVX__DECLARE_VAR)
+    AVX__WOP_KEY (AVX__CTR_START, 0);
+
+    w += 1;
     do
     {
-      AES_ENC(0)
-      AES_ENC(1)
-      w += 2;
+      AVX__WOP_KEY (AVX__AES_ENC, 0)
+      w += 1;
     }
-    while (--numRounds2 != 0);
-    AES_ENC(0)
-    AES_ENC_LAST(1)
-    data[0] = _mm_xor_si128(data[0], m0);
-    data[1] = _mm_xor_si128(data[1], m1);
-    data[2] = _mm_xor_si128(data[2], m2);
+    while (--r);
+    AVX__WOP_KEY (AVX__AES_ENC_LAST, 0)
+   
+    WOP (AVX__CTR_END);
   }
-  for (; numBlocks != 0; numBlocks--, data++)
+  WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC)
+  
+  SINGLE_LOOP
   {
-    UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
+    UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
     const __m128i *w = p;
     __m128i m;
-    ctr = _mm_add_epi64(ctr, one);
-    m = _mm_xor_si128(ctr, p[2]);
-    w += 3;
+    MM_OP (_mm_add_epi64, ctr, one);
+    m = _mm_xor_si128 (ctr, p[0]);
+    w += 1;
     do
     {
-      m = _mm_aesenc_si128(m, w[0]);
-      m = _mm_aesenc_si128(m, w[1]);
+      MM_OP_m (_mm_aesenc_si128, w[0]);
+      MM_OP_m (_mm_aesenc_si128, w[1]);
       w += 2;
     }
-    while (--numRounds2 != 0);
-    m = _mm_aesenc_si128(m, w[0]);
-    m = _mm_aesenclast_si128(m, w[1]);
-    *data = _mm_xor_si128(*data, m);
+    while (--numRounds2);
+    MM_OP_m (_mm_aesenc_si128,     w[0]);
+    MM_OP_m (_mm_aesenclast_si128, w[1]);
+    MM_XOR (*data, m);
   }
-  *p = ctr;
+
+  p[-2] = ctr;
 }
 
+#endif // USE_INTEL_VAES
+
+#else // USE_INTEL_AES
+
+/* no USE_INTEL_AES */
+
+#pragma message("AES  HW_SW stub was used")
+
+#define AES_TYPE_keys UInt32
+#define AES_TYPE_data Byte
+
+#define AES_FUNC_START(name) \
+    void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \
+
+#define AES_COMPAT_STUB(name) \
+    AES_FUNC_START(name); \
+    AES_FUNC_START(name ## _HW) \
+    { name(p, data, numBlocks); }
+
+AES_COMPAT_STUB (AesCbc_Encode)
+AES_COMPAT_STUB (AesCbc_Decode)
+AES_COMPAT_STUB (AesCtr_Code)
+
+#endif // USE_INTEL_AES
+
+
+#ifndef USE_INTEL_VAES
+
+#pragma message("VAES HW_SW stub was used")
+
+#define VAES_COMPAT_STUB(name) \
+    void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
+    void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \
+    { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); }
+
+VAES_COMPAT_STUB (AesCbc_Decode_HW)
+VAES_COMPAT_STUB (AesCtr_Code_HW)
+
+#endif // ! USE_INTEL_VAES
+
+
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+
+  #if defined(__clang__)
+    #if (__clang_major__ >= 8) // fix that check
+      #define USE_HW_AES
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ >= 6) // fix that check
+      #define USE_HW_AES
+    #endif
+  #elif defined(_MSC_VER)
+    #if _MSC_VER >= 1910
+      #define USE_HW_AES
+    #endif
+  #endif
+
+#ifdef USE_HW_AES
+
+// #pragma message("=== AES HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+  #ifdef MY_CPU_ARM64
+    #define ATTRIB_AES __attribute__((__target__("+crypto")))
+  #else
+    #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+  #endif
+#else
+  // _MSC_VER
+  // for arm32
+  #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#ifndef ATTRIB_AES
+  #define ATTRIB_AES
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
 #else
+#include <arm_neon.h>
+#endif
+
+typedef uint8x16_t v128;
+
+#define AES_FUNC_START(name) \
+    void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks)
+
+#define AES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_AES \
+AES_FUNC_START (name)
+
+#define MM_OP(op, dest, src)  dest = op(dest, src);
+#define MM_OP_m(op, src)      MM_OP(op, m, src);
+#define MM_OP1_m(op)          m = op(m);
 
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
+#define MM_XOR( dest, src)    MM_OP(veorq_u8, dest, src);
+#define MM_XOR_m( src)        MM_XOR(m, src);
 
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+#define AES_E_m(k)     MM_OP_m (vaeseq_u8, k);
+#define AES_E_MC_m(k)  AES_E_m (k);  MM_OP1_m(vaesmcq_u8);
+
+
+AES_FUNC_START2 (AesCbc_Encode_HW)
 {
-  AesCbc_Encode(p, data, numBlocks);
+  v128 m = *p;
+  const v128 k0 = p[2];
+  const v128 k1 = p[3];
+  const v128 k2 = p[4];
+  const v128 k3 = p[5];
+  const v128 k4 = p[6];
+  const v128 k5 = p[7];
+  const v128 k6 = p[8];
+  const v128 k7 = p[9];
+  const v128 k8 = p[10];
+  const v128 k9 = p[11];
+  const UInt32 numRounds2 = *(const UInt32 *)(p + 1);
+  const v128 *w = p + ((size_t)numRounds2 * 2);
+  const v128 k_z1 = w[1];
+  const v128 k_z0 = w[2];
+  for (; numBlocks != 0; numBlocks--, data++)
+  {
+    MM_XOR_m (*data);
+    AES_E_MC_m (k0)
+    AES_E_MC_m (k1)
+    AES_E_MC_m (k2)
+    AES_E_MC_m (k3)
+    AES_E_MC_m (k4)
+    AES_E_MC_m (k5)
+    AES_E_MC_m (k6)
+    AES_E_MC_m (k7)
+    AES_E_MC_m (k8)
+    if (numRounds2 >= 6)
+    {
+      AES_E_MC_m (k9)
+      AES_E_MC_m (p[12])
+      if (numRounds2 != 6)
+      {
+        AES_E_MC_m (p[13])
+        AES_E_MC_m (p[14])
+      }
+    }
+    AES_E_m  (k_z1);
+    MM_XOR_m (k_z0);
+    *data = m;
+  }
+  *p = m;
 }
 
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+
+#define WOP_1(op)
+#define WOP_2(op)   WOP_1 (op)  op (m1, 1);
+#define WOP_3(op)   WOP_2 (op)  op (m2, 2);
+#define WOP_4(op)   WOP_3 (op)  op (m3, 3);
+#define WOP_5(op)   WOP_4 (op)  op (m4, 4);
+#define WOP_6(op)   WOP_5 (op)  op (m5, 5);
+#define WOP_7(op)   WOP_6 (op)  op (m6, 6);
+#define WOP_8(op)   WOP_7 (op)  op (m7, 7);
+
+  #define NUM_WAYS      8
+  #define WOP_M1    WOP_8
+
+#define WOP(op)  op (m0, 0);  WOP_M1(op)
+
+#define DECLARE_VAR(reg, ii)  v128 reg
+#define LOAD_data(  reg, ii)  reg = data[ii];
+#define STORE_data( reg, ii)  data[ii] = reg;
+#if (NUM_WAYS > 1)
+#define XOR_data_M1(reg, ii)  MM_XOR (reg, data[ii- 1]);
+#endif
+
+#define MM_OP_key(op, reg)  MM_OP (op, reg, key);
+
+#define AES_D_m(k)      MM_OP_m (vaesdq_u8, k);
+#define AES_D_IMC_m(k)  AES_D_m (k);  MM_OP1_m (vaesimcq_u8);
+
+#define AES_XOR(   reg, ii)  MM_OP_key (veorq_u8,  reg)
+#define AES_D(     reg, ii)  MM_OP_key (vaesdq_u8, reg)
+#define AES_E(     reg, ii)  MM_OP_key (vaeseq_u8, reg)
+
+#define AES_D_IMC( reg, ii)  AES_D (reg, ii);  reg = vaesimcq_u8(reg)
+#define AES_E_MC(  reg, ii)  AES_E (reg, ii);  reg = vaesmcq_u8(reg)
+
+#define CTR_START(reg, ii)  MM_OP (vaddq_u64, ctr, one);  reg = vreinterpretq_u8_u64(ctr);
+#define CTR_END(  reg, ii)  MM_XOR (data[ii], reg);
+
+#define WOP_KEY(op, n) { \
+    const v128 key = w[n]; \
+    WOP(op); }
+
+#define WIDE_LOOP_START  \
+    dataEnd = data + numBlocks;  \
+    if (numBlocks >= NUM_WAYS)  \
+    { dataEnd -= NUM_WAYS; do {  \
+
+#define WIDE_LOOP_END  \
+    data += NUM_WAYS;  \
+    } while (data <= dataEnd);  \
+    dataEnd += NUM_WAYS; }  \
+
+#define SINGLE_LOOP  \
+    for (; data < dataEnd; data++)
+
+
+AES_FUNC_START2 (AesCbc_Decode_HW)
 {
-  AesCbc_Decode(p, data, numBlocks);
+  v128 iv = *p;
+  const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
+  const v128 *dataEnd;
+  p += 2;
+  
+  WIDE_LOOP_START
+  {
+    const v128 *w = wStart;
+    WOP (DECLARE_VAR)
+    WOP (LOAD_data);
+    WOP_KEY (AES_D_IMC, 2)
+    do
+    {
+      WOP_KEY (AES_D_IMC, 1)
+      WOP_KEY (AES_D_IMC, 0)
+      w -= 2;
+    }
+    while (w != p);
+    WOP_KEY (AES_D,   1)
+    WOP_KEY (AES_XOR, 0)
+    MM_XOR (m0, iv);
+    WOP_M1 (XOR_data_M1)
+    iv = data[NUM_WAYS - 1];
+    WOP (STORE_data);
+  }
+  WIDE_LOOP_END
+
+  SINGLE_LOOP
+  {
+    const v128 *w = wStart;
+    v128 m = *data;
+    AES_D_IMC_m (w[2])
+    do
+    {
+      AES_D_IMC_m (w[1]);
+      AES_D_IMC_m (w[0]);
+      w -= 2;
+    }
+    while (w != p);
+    AES_D_m  (w[1]);
+    MM_XOR_m (w[0]);
+    MM_XOR_m (iv);
+    iv = *data;
+    *data = m;
+  }
+  
+  p[-2] = iv;
 }
 
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+
+AES_FUNC_START2 (AesCtr_Code_HW)
 {
-  AesCtr_Code(p, data, numBlocks);
+  uint64x2_t ctr = vreinterpretq_u64_u8(*p);
+  const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
+  const v128 *dataEnd;
+  uint64x2_t one = vdupq_n_u64(0);
+  one = vsetq_lane_u64(1, one, 0);
+  p += 2;
+  
+  WIDE_LOOP_START
+  {
+    const v128 *w = p;
+    WOP (DECLARE_VAR)
+    WOP (CTR_START);
+    do
+    {
+      WOP_KEY (AES_E_MC, 0)
+      WOP_KEY (AES_E_MC, 1)
+      w += 2;
+    }
+    while (w != wEnd);
+    WOP_KEY (AES_E_MC, 0)
+    WOP_KEY (AES_E,    1)
+    WOP_KEY (AES_XOR,  2)
+    WOP (CTR_END);
+  }
+  WIDE_LOOP_END
+
+  SINGLE_LOOP
+  {
+    const v128 *w = p;
+    v128 m;
+    CTR_START (m, 0);
+    do
+    {
+      AES_E_MC_m (w[0]);
+      AES_E_MC_m (w[1]);
+      w += 2;
+    }
+    while (w != wEnd);
+    AES_E_MC_m (w[0]);
+    AES_E_m    (w[1]);
+    MM_XOR_m   (w[2]);
+    CTR_END (m, 0);
+  }
+  
+  p[-2] = vreinterpretq_u8_u64(ctr);
 }
 
-#endif
+#endif // USE_HW_AES
+
+#endif // MY_CPU_ARM_OR_ARM64
diff --git a/deps/LZMA-SDK/C/Alloc.c b/deps/LZMA-SDK/C/Alloc.c
index 30b499e5f..9f249c250 100644
--- a/deps/LZMA-SDK/C/Alloc.c
+++ b/deps/LZMA-SDK/C/Alloc.c
@@ -1,12 +1,12 @@
 /* Alloc.c -- Memory allocation functions
-2018-04-27 : Igor Pavlov : Public domain */
+2020-10-29 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
 #include <stdio.h>
 
 #ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
 #endif
 #include <stdlib.h>
 
@@ -122,7 +122,6 @@ static void PrintAddr(void *p)
 #define Print(s)
 #define PrintLn()
 #define PrintHex(v, align)
-#define PrintDec(v, align)
 #define PrintAddr(p)
 
 #endif
@@ -133,10 +132,11 @@ void *MyAlloc(size_t size)
 {
   if (size == 0)
     return NULL;
+  PRINT_ALLOC("Alloc    ", g_allocCount, size, NULL);
   #ifdef _SZ_ALLOC_DEBUG
   {
     void *p = malloc(size);
-    PRINT_ALLOC("Alloc    ", g_allocCount, size, p);
+    // PRINT_ALLOC("Alloc    ", g_allocCount, size, p);
     return p;
   }
   #else
@@ -172,14 +172,20 @@ void MidFree(void *address)
   VirtualFree(address, 0, MEM_RELEASE);
 }
 
-#ifndef MEM_LARGE_PAGES
-#undef _7ZIP_LARGE_PAGES
+#ifdef _7ZIP_LARGE_PAGES
+
+#ifdef MEM_LARGE_PAGES
+  #define MY__MEM_LARGE_PAGES  MEM_LARGE_PAGES
+#else
+  #define MY__MEM_LARGE_PAGES  0x20000000
 #endif
 
-#ifdef _7ZIP_LARGE_PAGES
+extern
+SIZE_T g_LargePageSize;
 SIZE_T g_LargePageSize = 0;
-typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
-#endif
+typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
+
+#endif // _7ZIP_LARGE_PAGES
 
 void SetLargePageSize()
 {
@@ -214,7 +220,7 @@ void *BigAlloc(size_t size)
       size2 = (size + ps) & ~ps;
       if (size2 >= size)
       {
-        void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+        void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
         if (res)
           return res;
       }
@@ -280,13 +286,15 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
 */
 #define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
 
-#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
-
 
-#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
+#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
   #define USE_posix_memalign
 #endif
 
+#ifndef USE_posix_memalign
+#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
+#endif
+
 /*
   This posix_memalign() is for test purposes only.
   We also need special Free() function instead of free(),
diff --git a/deps/LZMA-SDK/C/Alloc.h b/deps/LZMA-SDK/C/Alloc.h
index 3d796e5ee..a1bbe942c 100644
--- a/deps/LZMA-SDK/C/Alloc.h
+++ b/deps/LZMA-SDK/C/Alloc.h
@@ -1,5 +1,5 @@
 /* Alloc.h -- Memory allocation functions
-2018-02-19 : Igor Pavlov : Public domain */
+2021-02-08 : Igor Pavlov : Public domain */
 
 #ifndef __COMMON_ALLOC_H
 #define __COMMON_ALLOC_H
@@ -13,7 +13,7 @@ void MyFree(void *address);
 
 #ifdef _WIN32
 
-void SetLargePageSize();
+void SetLargePageSize(void);
 
 void *MidAlloc(size_t size);
 void MidFree(void *address);
diff --git a/deps/LZMA-SDK/C/Bcj2.c b/deps/LZMA-SDK/C/Bcj2.c
index da93985cf..c1772f234 100644
--- a/deps/LZMA-SDK/C/Bcj2.c
+++ b/deps/LZMA-SDK/C/Bcj2.c
@@ -1,5 +1,5 @@
 /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
-2018-04-28 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
         const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
         const Byte *srcLim;
         Byte *dest;
-        SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
+        SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
         
         if (num == 0)
         {
@@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
         dest = p->dest;
         if (num > (SizeT)(p->destLim - dest))
         {
-          num = p->destLim - dest;
+          num = (SizeT)(p->destLim - dest);
           if (num == 0)
           {
             p->state = BCJ2_DEC_STATE_ORIG;
@@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
           break;
         }
         
-        num = src - p->bufs[BCJ2_STREAM_MAIN];
+        num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
         
         if (src == srcLim)
         {
@@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
       p->ip += 4;
       val -= p->ip;
       dest = p->dest;
-      rem = p->destLim - dest;
+      rem = (SizeT)(p->destLim - dest);
       
       if (rem < 4)
       {
diff --git a/deps/LZMA-SDK/C/Bcj2Enc.c b/deps/LZMA-SDK/C/Bcj2Enc.c
index 7a02ecde2..71ac5091d 100644
--- a/deps/LZMA-SDK/C/Bcj2Enc.c
+++ b/deps/LZMA-SDK/C/Bcj2Enc.c
@@ -1,5 +1,5 @@
 /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -104,7 +104,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
         const Byte *src = p->src;
         const Byte *srcLim;
         Byte *dest;
-        SizeT num = p->srcLim - src;
+        SizeT num = (SizeT)(p->srcLim - src);
 
         if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
         {
@@ -118,7 +118,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
         dest = p->bufs[BCJ2_STREAM_MAIN];
         if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
         {
-          num = p->lims[BCJ2_STREAM_MAIN] - dest;
+          num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
           if (num == 0)
           {
             p->state = BCJ2_STREAM_MAIN;
@@ -152,7 +152,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
           break;
         }
         
-        num = src - p->src;
+        num = (SizeT)(src - p->src);
         
         if (src == srcLim)
         {
diff --git a/deps/LZMA-SDK/C/Bra.c b/deps/LZMA-SDK/C/Bra.c
index cbdcb290d..cdefa4d2e 100644
--- a/deps/LZMA-SDK/C/Bra.c
+++ b/deps/LZMA-SDK/C/Bra.c
@@ -1,5 +1,5 @@
 /* Bra.c -- Converters for RISC code
-2017-04-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
     for (;;)
     {
       if (p >= lim)
-        return p - data;
+        return (SizeT)(p - data);
       p += 4;
       if (p[-1] == 0xEB)
         break;
@@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
     for (;;)
     {
       if (p >= lim)
-        return p - data;
+        return (SizeT)(p - data);
       p += 4;
       if (p[-1] == 0xEB)
         break;
@@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
     {
       UInt32 b3;
       if (p > lim)
-        return p - data;
+        return (SizeT)(p - data);
       b1 = p[1];
       b3 = p[3];
       p += 2;
@@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
     {
       UInt32 b3;
       if (p > lim)
-        return p - data;
+        return (SizeT)(p - data);
       b1 = p[1];
       b3 = p[3];
       p += 2;
@@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
     for (;;)
     {
       if (p >= lim)
-        return p - data;
+        return (SizeT)(p - data);
       p += 4;
       /* if ((v & 0xFC000003) == 0x48000001) */
       if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
@@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
     for (;;)
     {
       if (p >= lim)
-        return p - data;
+        return (SizeT)(p - data);
       /*
       v = GetBe32(p);
       p += 4;
diff --git a/deps/LZMA-SDK/C/Bra86.c b/deps/LZMA-SDK/C/Bra86.c
index a6463c63b..d857dac67 100644
--- a/deps/LZMA-SDK/C/Bra86.c
+++ b/deps/LZMA-SDK/C/Bra86.c
@@ -1,5 +1,5 @@
 /* Bra86.c -- Converter for x86 code (BCJ)
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding
         break;
 
     {
-      SizeT d = (SizeT)(p - data - pos);
+      SizeT d = (SizeT)(p - data) - pos;
       pos = (SizeT)(p - data);
       if (p >= limit)
       {
diff --git a/deps/LZMA-SDK/C/Compiler.h b/deps/LZMA-SDK/C/Compiler.h
index c788648cd..eba374298 100644
--- a/deps/LZMA-SDK/C/Compiler.h
+++ b/deps/LZMA-SDK/C/Compiler.h
@@ -1,9 +1,13 @@
 /* Compiler.h
-2017-04-03 : Igor Pavlov : Public domain */
+2021-01-05 : Igor Pavlov : Public domain */
 
 #ifndef __7Z_COMPILER_H
 #define __7Z_COMPILER_H
 
+  #ifdef __clang__
+    #pragma clang diagnostic ignored "-Wunused-private-field"
+  #endif
+
 #ifdef _MSC_VER
 
   #ifdef UNDER_CE
@@ -25,6 +29,12 @@
     #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
   #endif
 
+  #ifdef __clang__
+    #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+    #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
+    // #pragma clang diagnostic ignored "-Wreserved-id-macro"
+  #endif
+
 #endif
 
 #define UNUSED_VAR(x) (void)x;
diff --git a/deps/LZMA-SDK/C/CpuArch.c b/deps/LZMA-SDK/C/CpuArch.c
index ff1890e7f..bcdbc4cc1 100644
--- a/deps/LZMA-SDK/C/CpuArch.c
+++ b/deps/LZMA-SDK/C/CpuArch.c
@@ -1,5 +1,5 @@
 /* CpuArch.c -- CPU specific code
-2018-02-18: Igor Pavlov : Public domain */
+2021-04-28 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag)
 #define CHECK_CPUID_IS_SUPPORTED
 #endif
 
+#ifndef USE_ASM
+  #ifdef _MSC_VER
+    #if _MSC_VER >= 1600
+      #define MY__cpuidex  __cpuidex
+    #else
+
+/*
+ __cpuid (function == 4) requires subfunction number in ECX.
+  MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+   __cpuid() in new MSVC clears ECX.
+   __cpuid() in old MSVC (14.00) doesn't clear ECX
+ We still can use __cpuid for low (function) values that don't require ECX,
+ but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
+ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+ where ECX value is first parameter for FAST_CALL / NO_INLINE function,
+ So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
+ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+ 
+ DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
+*/
+
+static
+MY_NO_INLINE
+void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
+{
+  UNUSED_VAR(subFunction);
+  __cpuid(CPUInfo, function);
+}
+
+      #define MY__cpuidex(info, func, func2)  MY__cpuidex_HACK(func2, info, func)
+      #pragma message("======== MY__cpuidex_HACK WAS USED ========")
+    #endif
+  #else
+     #define MY__cpuidex(info, func, func2)  __cpuid(info, func)
+     #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
+  #endif
+#endif
+
+
+
+
 void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
 {
   #ifdef USE_ASM
@@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
   #endif
       "=c" (*c) ,
       "=d" (*d)
-    : "0" (function)) ;
+    : "0" (function), "c"(0) ) ;
 
   #endif
   
   #else
 
   int CPUInfo[4];
-  __cpuid(CPUInfo, function);
-  *a = CPUInfo[0];
-  *b = CPUInfo[1];
-  *c = CPUInfo[2];
-  *d = CPUInfo[3];
+
+  MY__cpuidex(CPUInfo, (int)function, 0);
+
+  *a = (UInt32)CPUInfo[0];
+  *b = (UInt32)CPUInfo[1];
+  *c = (UInt32)CPUInfo[2];
+  *d = (UInt32)CPUInfo[3];
 
   #endif
 }
@@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder()
 }
 
 #if !defined(MY_CPU_AMD64) && defined(_WIN32)
-#include <windows.h>
+#include <Windows.h>
 static BoolInt CPU_Sys_Is_SSE_Supported()
 {
   OSVERSIONINFO vi;
@@ -188,13 +231,77 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
 #define CHECK_SYS_SSE_SUPPORT
 #endif
 
-BoolInt CPU_Is_Aes_Supported()
+
+static UInt32 X86_CPUID_ECX_Get_Flags()
 {
   Cx86cpuid p;
   CHECK_SYS_SSE_SUPPORT
   if (!x86cpuid_CheckAndRead(&p))
+    return 0;
+  return p.c;
+}
+
+BoolInt CPU_IsSupported_AES()
+{
+  return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSSE3()
+{
+  return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE41()
+{
+  return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
+}
+
+BoolInt CPU_IsSupported_SHA()
+{
+  Cx86cpuid p;
+  CHECK_SYS_SSE_SUPPORT
+  if (!x86cpuid_CheckAndRead(&p))
+    return False;
+
+  if (p.maxFunc < 7)
+    return False;
+  {
+    UInt32 d[4] = { 0 };
+    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+    return (d[1] >> 29) & 1;
+  }
+}
+
+// #include <stdio.h>
+
+#ifdef _WIN32
+#include <Windows.h>
+#endif
+
+BoolInt CPU_IsSupported_VAES_AVX2()
+{
+  Cx86cpuid p;
+  CHECK_SYS_SSE_SUPPORT
+
+  #ifdef _WIN32
+  #define MY__PF_XSAVE_ENABLED  17
+  if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
+    return False;
+  #endif
+
+  if (!x86cpuid_CheckAndRead(&p))
+    return False;
+  if (p.maxFunc < 7)
     return False;
-  return (p.c >> 25) & 1;
+  {
+    UInt32 d[4] = { 0 };
+    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+    return 1
+      & (d[1] >> 5) // avx2
+      // & (d[1] >> 31) // avx512vl
+      & (d[2] >> 9); // vaes // VEX-256/EVEX
+  }
 }
 
 BoolInt CPU_IsSupported_PageGB()
@@ -215,4 +322,117 @@ BoolInt CPU_IsSupported_PageGB()
   }
 }
 
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+#ifdef _WIN32
+
+#include <Windows.h>
+
+BoolInt CPU_IsSupported_CRC32()
+  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO()
+  { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+
+#else
+
+#if defined(__APPLE__)
+
+/*
+#include <stdio.h>
+#include <string.h>
+static void Print_sysctlbyname(const char *name)
+{
+  size_t bufSize = 256;
+  char buf[256];
+  int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
+  {
+    int i;
+    printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
+    for (i = 0; i < 20; i++)
+      printf(" %2x", (unsigned)(Byte)buf[i]);
+
+  }
+}
+*/
+
+BoolInt CPU_IsSupported_CRC32(void)
+{
+  /*
+  Print_sysctlbyname("hw.pagesize");
+  Print_sysctlbyname("machdep.cpu.brand_string");
+  */
+
+  UInt32 val = 0;
+  if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1)
+    return 1;
+  return 0;
+}
+
+#ifdef MY_CPU_ARM64
+#define APPLE_CRYPTO_SUPPORT_VAL 1
+#else
+#define APPLE_CRYPTO_SUPPORT_VAL 0
+#endif
+
+BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+
+
+#else // __APPLE__
+
+#include <sys/auxv.h>
+
+#define USE_HWCAP
+
+#ifdef USE_HWCAP
+
+#include <asm/hwcap.h>
+
+#ifdef MY_CPU_ARM64
+  #define MY_HWCAP_CHECK_FUNC(name) \
+  BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP)  & (HWCAP_  ## name)) ? 1 : 0; }
+#elif defined(MY_CPU_ARM)
+  #define MY_HWCAP_CHECK_FUNC(name) \
+  BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
+#endif
+
+#else // USE_HWCAP
+
+  #define MY_HWCAP_CHECK_FUNC(name) \
+  BoolInt CPU_IsSupported_ ## name() { return 0; }
+
+#endif // USE_HWCAP
+
+MY_HWCAP_CHECK_FUNC (CRC32)
+MY_HWCAP_CHECK_FUNC (SHA1)
+MY_HWCAP_CHECK_FUNC (SHA2)
+MY_HWCAP_CHECK_FUNC (AES)
+
+#endif // __APPLE__
+#endif // _WIN32
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+
+#ifdef __APPLE__
+
+#include <sys/sysctl.h>
+
+int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+{
+  return sysctlbyname(name, buf, bufSize, NULL, 0);
+}
+
+int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+{
+  size_t bufSize = sizeof(*val);
+  int res = My_sysctlbyname_Get(name, val, &bufSize);
+  if (res == 0 && bufSize != sizeof(*val))
+    return EFAULT;
+  return res;
+}
+
 #endif
diff --git a/deps/LZMA-SDK/C/CpuArch.h b/deps/LZMA-SDK/C/CpuArch.h
index 5f74c1c0c..e1cde536d 100644
--- a/deps/LZMA-SDK/C/CpuArch.h
+++ b/deps/LZMA-SDK/C/CpuArch.h
@@ -1,5 +1,5 @@
 /* CpuArch.h -- CPU specific code
-2018-02-18 : Igor Pavlov : Public domain */
+2021-04-25 : Igor Pavlov : Public domain */
 
 #ifndef __CPU_ARCH_H
 #define __CPU_ARCH_H
@@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN.
 If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
 
 MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+
+MY_CPU_64BIT means that processor can work with 64-bit registers.
+  MY_CPU_64BIT can be used to select fast code branch
+  MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
 */
 
 #if  defined(_M_X64) \
@@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
   #define MY_CPU_AMD64
   #ifdef __ILP32__
     #define MY_CPU_NAME "x32"
+    #define MY_CPU_SIZEOF_POINTER 4
   #else
     #define MY_CPU_NAME "x64"
+    #define MY_CPU_SIZEOF_POINTER 8
   #endif
   #define MY_CPU_64BIT
 #endif
@@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
   || defined(__i386__)
   #define MY_CPU_X86
   #define MY_CPU_NAME "x86"
-  #define MY_CPU_32BIT
+  /* #define MY_CPU_32BIT */
+  #define MY_CPU_SIZEOF_POINTER 4
 #endif
 
 
@@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
   || defined(__THUMBEL__) \
   || defined(__THUMBEB__)
   #define MY_CPU_ARM
-  #define MY_CPU_NAME "arm"
-  #define MY_CPU_32BIT
+
+  #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
+    #define MY_CPU_NAME "armt"
+  #else
+    #define MY_CPU_NAME "arm"
+  #endif
+  /* #define MY_CPU_32BIT */
+  #define MY_CPU_SIZEOF_POINTER 4
 #endif
 
 
@@ -84,17 +97,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
 
 
 #if  defined(__ppc64__) \
-  || defined(__powerpc64__)
+  || defined(__powerpc64__) \
+  || defined(__ppc__) \
+  || defined(__powerpc__) \
+  || defined(__PPC__) \
+  || defined(_POWER)
+
+#if  defined(__ppc64__) \
+  || defined(__powerpc64__) \
+  || defined(_LP64) \
+  || defined(__64BIT__)
   #ifdef __ILP32__
     #define MY_CPU_NAME "ppc64-32"
+    #define MY_CPU_SIZEOF_POINTER 4
   #else
     #define MY_CPU_NAME "ppc64"
+    #define MY_CPU_SIZEOF_POINTER 8
   #endif
   #define MY_CPU_64BIT
-#elif defined(__ppc__) \
-  || defined(__powerpc__)
+#else
   #define MY_CPU_NAME "ppc"
-  #define MY_CPU_32BIT
+  #define MY_CPU_SIZEOF_POINTER 4
+  /* #define MY_CPU_32BIT */
+#endif
 #endif
 
 
@@ -111,6 +136,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
 #define MY_CPU_X86_OR_AMD64
 #endif
 
+#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
+#define MY_CPU_ARM_OR_ARM64
+#endif
+
 
 #ifdef _WIN32
 
@@ -170,6 +199,41 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
   #error Stop_Compiling_Bad_32_64_BIT
 #endif
 
+#ifdef __SIZEOF_POINTER__
+  #ifdef MY_CPU_SIZEOF_POINTER
+    #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
+      #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+    #endif
+  #else
+    #define MY_CPU_SIZEOF_POINTER  __SIZEOF_POINTER__
+  #endif
+#endif
+
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+#if defined (_LP64)
+      #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+#endif
+#endif
+
+#ifdef _MSC_VER
+  #if _MSC_VER >= 1300
+    #define MY_CPU_pragma_pack_push_1   __pragma(pack(push, 1))
+    #define MY_CPU_pragma_pop           __pragma(pack(pop))
+  #else
+    #define MY_CPU_pragma_pack_push_1
+    #define MY_CPU_pragma_pop
+  #endif
+#else
+  #ifdef __xlC__
+    // for XLC compiler:
+    #define MY_CPU_pragma_pack_push_1   _Pragma("pack(1)")
+    #define MY_CPU_pragma_pop           _Pragma("pack()")
+  #else
+    #define MY_CPU_pragma_pack_push_1   _Pragma("pack(push, 1)")
+    #define MY_CPU_pragma_pop           _Pragma("pack(pop)")
+  #endif
+#endif
+
 
 #ifndef MY_CPU_NAME
   #ifdef MY_CPU_LE
@@ -202,9 +266,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
 #define GetUi32(p) (*(const UInt32 *)(const void *)(p))
 #define GetUi64(p) (*(const UInt64 *)(const void *)(p))
 
-#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
-#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
-#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
+#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
 
 #else
 
@@ -242,7 +306,7 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
   #define MY__has_builtin(x) 0
 #endif
 
-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
+#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
 
 /* Note: we use bswap instruction, that is unsupported in 386 cpu */
 
@@ -253,8 +317,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
 #pragma intrinsic(_byteswap_uint64)
 
 /* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
+#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
+#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
 
 #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
 
@@ -262,9 +326,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
        (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
     || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
 
-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
+/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
+#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
+#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
 
 #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
 
@@ -325,10 +389,35 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
 #define x86cpuid_GetModel(ver)  (((ver >> 12) &  0xF0) | ((ver >> 4) & 0xF))
 #define x86cpuid_GetStepping(ver) (ver & 0xF)
 
-BoolInt CPU_Is_InOrder();
-BoolInt CPU_Is_Aes_Supported();
-BoolInt CPU_IsSupported_PageGB();
+BoolInt CPU_Is_InOrder(void);
+
+BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_VAES_AVX2(void);
+BoolInt CPU_IsSupported_SSSE3(void);
+BoolInt CPU_IsSupported_SSE41(void);
+BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_PageGB(void);
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+BoolInt CPU_IsSupported_CRC32(void);
+
+#if defined(_WIN32)
+BoolInt CPU_IsSupported_CRYPTO(void);
+#define CPU_IsSupported_SHA1  CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_SHA2  CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_AES   CPU_IsSupported_CRYPTO
+#else
+BoolInt CPU_IsSupported_SHA1(void);
+BoolInt CPU_IsSupported_SHA2(void);
+BoolInt CPU_IsSupported_AES(void);
+#endif
+
+#endif
 
+#if defined(__APPLE__)
+int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
 #endif
 
 EXTERN_C_END
diff --git a/deps/LZMA-SDK/C/Delta.c b/deps/LZMA-SDK/C/Delta.c
index 6cbbe4601..fc7e9fe96 100644
--- a/deps/LZMA-SDK/C/Delta.c
+++ b/deps/LZMA-SDK/C/Delta.c
@@ -1,5 +1,5 @@
 /* Delta.c -- Delta converter
-2009-05-26 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -12,53 +12,158 @@ void Delta_Init(Byte *state)
     state[i] = 0;
 }
 
-static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
-{
-  unsigned i;
-  for (i = 0; i < size; i++)
-    dest[i] = src[i];
-}
 
 void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
 {
-  Byte buf[DELTA_STATE_SIZE];
-  unsigned j = 0;
-  MyMemCpy(buf, state, delta);
+  Byte temp[DELTA_STATE_SIZE];
+
+  if (size == 0)
+    return;
+
+  {
+    unsigned i = 0;
+    do
+      temp[i] = state[i];
+    while (++i != delta);
+  }
+
+  if (size <= delta)
+  {
+    unsigned i = 0, k;
+    do
+    {
+      Byte b = *data;
+      *data++ = (Byte)(b - temp[i]);
+      temp[i] = b;
+    }
+    while (++i != size);
+    
+    k = 0;
+    
+    do
+    {
+      if (i == delta)
+        i = 0;
+      state[k] = temp[i++];
+    }
+    while (++k != delta);
+    
+    return;
+  }
+    
   {
-    SizeT i;
-    for (i = 0; i < size;)
+    Byte *p = data + size - delta;
+    {
+      unsigned i = 0;
+      do
+        state[i] = *p++;
+      while (++i != delta);
+    }
     {
-      for (j = 0; j < delta && i < size; i++, j++)
+      const Byte *lim = data + delta;
+      ptrdiff_t dif = -(ptrdiff_t)delta;
+      
+      if (((ptrdiff_t)size + dif) & 1)
       {
-        Byte b = data[i];
-        data[i] = (Byte)(b - buf[j]);
-        buf[j] = b;
+        --p;  *p = (Byte)(*p - p[dif]);
       }
+
+      while (p != lim)
+      {
+        --p;  *p = (Byte)(*p - p[dif]);
+        --p;  *p = (Byte)(*p - p[dif]);
+      }
+      
+      dif = -dif;
+      
+      do
+      {
+        --p;  *p = (Byte)(*p - temp[--dif]);
+      }
+      while (dif != 0);
     }
   }
-  if (j == delta)
-    j = 0;
-  MyMemCpy(state, buf + j, delta - j);
-  MyMemCpy(state + delta - j, buf, j);
 }
 
+
 void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
 {
-  Byte buf[DELTA_STATE_SIZE];
-  unsigned j = 0;
-  MyMemCpy(buf, state, delta);
+  unsigned i;
+  const Byte *lim;
+
+  if (size == 0)
+    return;
+  
+  i = 0;
+  lim = data + size;
+  
+  if (size <= delta)
+  {
+    do
+      *data = (Byte)(*data + state[i++]);
+    while (++data != lim);
+
+    for (; delta != i; state++, delta--)
+      *state = state[i];
+    data -= i;
+  }
+  else
   {
-    SizeT i;
-    for (i = 0; i < size;)
+    /*
+    #define B(n) b ## n
+    #define I(n) Byte B(n) = state[n];
+    #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
+    #define F(n) if (data != lim) { U(n) }
+
+    if (delta == 1)
+    {
+      I(0)
+      if ((lim - data) & 1) { U(0) }
+      while (data != lim) { U(0) U(0) }
+      data -= 1;
+    }
+    else if (delta == 2)
     {
-      for (j = 0; j < delta && i < size; i++, j++)
+      I(0) I(1)
+      lim -= 1; while (data < lim) { U(0) U(1) }
+      lim += 1; F(0)
+      data -= 2;
+    }
+    else if (delta == 3)
+    {
+      I(0) I(1) I(2)
+      lim -= 2; while (data < lim) { U(0) U(1) U(2) }
+      lim += 2; F(0) F(1)
+      data -= 3;
+    }
+    else if (delta == 4)
+    {
+      I(0) I(1) I(2) I(3)
+      lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
+      lim += 3; F(0) F(1) F(2)
+      data -= 4;
+    }
+    else
+    */
+    {
+      do
+      {
+        *data = (Byte)(*data + state[i++]);
+        data++;
+      }
+      while (i != delta);
+  
       {
-        buf[j] = data[i] = (Byte)(buf[j] + data[i]);
+        ptrdiff_t dif = -(ptrdiff_t)delta;
+        do
+          *data = (Byte)(*data + data[dif]);
+        while (++data != lim);
+        data += dif;
       }
     }
   }
-  if (j == delta)
-    j = 0;
-  MyMemCpy(state, buf + j, delta - j);
-  MyMemCpy(state + delta - j, buf, j);
+
+  do
+    *state++ = *data;
+  while (++data != lim);
 }
diff --git a/deps/LZMA-SDK/C/DllSecur.c b/deps/LZMA-SDK/C/DllSecur.c
index 19a22a9f0..b0a555c49 100644
--- a/deps/LZMA-SDK/C/DllSecur.c
+++ b/deps/LZMA-SDK/C/DllSecur.c
@@ -5,7 +5,7 @@
 
 #ifdef _WIN32
 
-#include <windows.h>
+#include <Windows.h>
 
 #include "DllSecur.h"
 
diff --git a/deps/LZMA-SDK/C/DllSecur.h b/deps/LZMA-SDK/C/DllSecur.h
index 4c113568e..0fd8070e5 100644
--- a/deps/LZMA-SDK/C/DllSecur.h
+++ b/deps/LZMA-SDK/C/DllSecur.h
@@ -10,8 +10,8 @@ EXTERN_C_BEGIN
 
 #ifdef _WIN32
 
-void My_SetDefaultDllDirectories();
-void LoadSecurityDlls();
+void My_SetDefaultDllDirectories(void);
+void LoadSecurityDlls(void);
 
 #endif
 
diff --git a/deps/LZMA-SDK/C/LzFind.c b/deps/LZMA-SDK/C/LzFind.c
index 4eefc17dd..18ec00ef5 100644
--- a/deps/LZMA-SDK/C/LzFind.c
+++ b/deps/LZMA-SDK/C/LzFind.c
@@ -1,10 +1,11 @@
 /* LzFind.c -- Match finder for LZ algorithms
-2018-07-08 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
 #include <string.h>
 
+#include "CpuArch.h"
 #include "LzFind.h"
 #include "LzHash.h"
 
@@ -14,7 +15,45 @@
 #define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
 #define kMaxHistorySize ((UInt32)7 << 29)
 
-#define kStartMaxLen 3
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+#define kFix5HashSize kFix4HashSize
+
+/*
+ HASH2_CALC:
+   if (hv) match, then cur[0] and cur[1] also match
+*/
+#define HASH2_CALC hv = GetUi16(cur);
+
+// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
+
+/*
+ HASH3_CALC:
+   if (cur[0]) and (h2) match, then cur[1]            also match
+   if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
+*/
+#define HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
+
+#define HASH5_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
+  /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
+  hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
+
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
 
 static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
 {
@@ -44,9 +83,9 @@ static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr
   return (p->bufferBase != NULL);
 }
 
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
 
-UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
 
 void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
 {
@@ -77,7 +116,7 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
   for (;;)
   {
     Byte *dest = p->buffer + (p->streamPos - p->pos);
-    size_t size = (p->bufferBase + p->blockSize - dest);
+    size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
     if (size == 0)
       return;
 
@@ -204,10 +243,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
     UInt32 hs;
     p->matchMaxLen = matchMaxLen;
     {
+      // UInt32 hs4;
       p->fixedHashSize = 0;
-      if (p->numHashBytes == 2)
-        hs = (1 << 16) - 1;
-      else
+      hs = (1 << 16) - 1;
+      if (p->numHashBytes != 2)
       {
         hs = historySize;
         if (hs > p->expectedDataSize)
@@ -218,9 +257,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
         hs |= (hs >> 2);
         hs |= (hs >> 4);
         hs |= (hs >> 8);
+        // we propagated 16 bits in (hs). Low 16 bits must be set later
         hs >>= 1;
-        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
-        if (hs > (1 << 24))
+        if (hs >= (1 << 24))
         {
           if (p->numHashBytes == 3)
             hs = (1 << 24) - 1;
@@ -228,12 +267,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
             hs >>= 1;
           /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
         }
+        
+        // hs = ((UInt32)1 << 25) - 1; // for test
+        
+        // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+        hs |= (1 << 16) - 1; /* don't change it! */
+        
+        // bt5: we adjust the size with recommended minimum size
+        if (p->numHashBytes >= 5)
+          hs |= (256 << kLzHash_CrcShift_2) - 1;
       }
       p->hashMask = hs;
       hs++;
+
+      /*
+      hs4 = (1 << 20);
+      if (hs4 > hs)
+        hs4 = hs;
+      // hs4 = (1 << 16); // for test
+      p->hash4Mask = hs4 - 1;
+      */
+
       if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
       if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
-      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+      // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
       hs += p->fixedHashSize;
     }
 
@@ -249,6 +306,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
         numSons <<= 1;
       newSize = hs + numSons;
 
+      // aligned size is not required here, but it can be better for some loops
+      #define NUM_REFS_ALIGN_MASK 0xF
+      newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
+
       if (p->hash && p->numRefs == newSize)
         return 1;
       
@@ -349,15 +410,23 @@ static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
 
 void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
 {
-  size_t i;
-  for (i = 0; i < numItems; i++)
+  if (numItems == 0)
+    return;
   {
-    UInt32 value = items[i];
-    if (value <= subValue)
-      value = kEmptyHashValue;
-    else
-      value -= subValue;
-    items[i] = value;
+  const CLzRef *lim = items + numItems - 1;
+  for (; items < lim; items += 2)
+  {
+    UInt32 v, m;
+    v = items[0];                 m = v - subValue;  if (v < subValue) m = kEmptyHashValue;
+    v = items[1];  items[0] = m;  m = v - subValue;  if (v < subValue) m = kEmptyHashValue;
+                   items[1] = m;
+  }
+  if (items == lim)
+  {
+    UInt32 v, m;
+    v = items[0];                 m = v - subValue;  if (v < subValue) m = kEmptyHashValue;
+                   items[0] = m;
+  }
   }
 }
 
@@ -429,8 +498,8 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
     {
       ptrdiff_t diff;
       curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
-      diff = (ptrdiff_t)0 - delta;
-      if (cur[maxLen] == cur[maxLen + diff])
+      diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+      if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
       {
         const Byte *c = cur;
         while (*c == c[diff])
@@ -588,15 +657,21 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
 
 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
 
-#define GET_MATCHES_FOOTER(offset, maxLen) \
-  offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
-  distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
+#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
+  offset = (unsigned)(func((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
+  distances + offset, (UInt32)(_maxLen_)) - distances); MOVE_POS_RET;
+
+#define GET_MATCHES_FOOTER_BT(_maxLen_) \
+  GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
+
+#define GET_MATCHES_FOOTER_HC(_maxLen_) \
+  GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
 
 #define SKIP_FOOTER \
   SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
 
 #define UPDATE_maxLen { \
-    ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+    ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
     const Byte *c = cur + maxLen; \
     const Byte *lim = cur + lenLimit; \
     for (; c != lim; c++) if (*(c + diff) != *c) break; \
@@ -610,7 +685,7 @@ static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
   curMatch = p->hash[hv];
   p->hash[hv] = p->pos;
   offset = 0;
-  GET_MATCHES_FOOTER(offset, 1)
+  GET_MATCHES_FOOTER_BT(1)
 }
 
 UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
@@ -621,7 +696,7 @@ UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
   curMatch = p->hash[hv];
   p->hash[hv] = p->pos;
   offset = 0;
-  GET_MATCHES_FOOTER(offset, 2)
+  GET_MATCHES_FOOTER_BT(2)
 }
 
 static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
@@ -659,9 +734,10 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
     }
   }
   
-  GET_MATCHES_FOOTER(offset, maxLen)
+  GET_MATCHES_FOOTER_BT(maxLen)
 }
 
+
 static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
   UInt32 h2, h3, d2, d3, pos;
@@ -676,53 +752,61 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 
   d2 = pos - hash                  [h2];
   d3 = pos - (hash + kFix3HashSize)[h3];
-
   curMatch = (hash + kFix4HashSize)[hv];
 
   hash                  [h2] = pos;
   (hash + kFix3HashSize)[h3] = pos;
   (hash + kFix4HashSize)[hv] = pos;
 
-  maxLen = 0;
+  maxLen = 3;
   offset = 0;
   
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
-  {
-    maxLen = 2;
-    distances[0] = 2;
-    distances[1] = d2 - 1;
-    offset = 2;
-  }
-  
-  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  for (;;)
   {
-    maxLen = 3;
-    distances[(size_t)offset + 1] = d3 - 1;
-    offset += 2;
-    d2 = d3;
-  }
+    if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+    {
+      distances[0] = 2;
+      distances[1] = d2 - 1;
+      offset = 2;
+      if (*(cur - d2 + 2) == cur[2])
+      {
+        // distances[0] = 3;
+      }
+      else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+      {
+        d2 = d3;
+        distances[2 + 1] = d3 - 1;
+        offset = 4;
+      }
+      else
+        break;
+    }
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      d2 = d3;
+      distances[1] = d3 - 1;
+      offset = 2;
+    }
+    else
+      break;
   
-  if (offset != 0)
-  {
     UPDATE_maxLen
     distances[(size_t)offset - 2] = (UInt32)maxLen;
     if (maxLen == lenLimit)
     {
       SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
-      MOVE_POS_RET;
+      MOVE_POS_RET
     }
+    break;
   }
   
-  if (maxLen < 3)
-    maxLen = 3;
-  
-  GET_MATCHES_FOOTER(offset, maxLen)
+  GET_MATCHES_FOOTER_BT(maxLen)
 }
 
-/*
+
 static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
-  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+  UInt32 h2, h3, d2, d3, maxLen, offset, pos;
   UInt32 *hash;
   GET_MATCHES_HEADER(5)
 
@@ -733,53 +817,49 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 
   d2 = pos - hash                  [h2];
   d3 = pos - (hash + kFix3HashSize)[h3];
-  d4 = pos - (hash + kFix4HashSize)[h4];
+  // d4 = pos - (hash + kFix4HashSize)[h4];
 
   curMatch = (hash + kFix5HashSize)[hv];
 
   hash                  [h2] = pos;
   (hash + kFix3HashSize)[h3] = pos;
-  (hash + kFix4HashSize)[h4] = pos;
+  // (hash + kFix4HashSize)[h4] = pos;
   (hash + kFix5HashSize)[hv] = pos;
 
-  maxLen = 0;
+  maxLen = 4;
   offset = 0;
 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  for (;;)
   {
-    distances[0] = maxLen = 2;
-    distances[1] = d2 - 1;
-    offset = 2;
-    if (*(cur - d2 + 2) == cur[2])
-      distances[0] = maxLen = 3;
+    if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+    {
+      distances[0] = 2;
+      distances[1] = d2 - 1;
+      offset = 2;
+      if (*(cur - d2 + 2) == cur[2])
+      {
+      }
+      else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+      {
+        distances[3] = d3 - 1;
+        offset = 4;
+        d2 = d3;
+      }
+      else
+        break;
+    }
     else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
     {
-      distances[2] = maxLen = 3;
-      distances[3] = d3 - 1;
-      offset = 4;
+      distances[1] = d3 - 1;
+      offset = 2;
       d2 = d3;
     }
-  }
-  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
-  {
-    distances[0] = maxLen = 3;
-    distances[1] = d3 - 1;
-    offset = 2;
-    d2 = d3;
-  }
-  
-  if (d2 != d4 && d4 < p->cyclicBufferSize
-      && *(cur - d4) == *cur
-      && *(cur - d4 + 3) == *(cur + 3))
-  {
-    maxLen = 4;
-    distances[(size_t)offset + 1] = d4 - 1;
-    offset += 2;
-    d2 = d4;
-  }
-  
-  if (offset != 0)
-  {
+    else
+      break;
+
+    distances[(size_t)offset - 2] = 3;
+    if (*(cur - d2 + 3) != cur[3])
+      break;
     UPDATE_maxLen
     distances[(size_t)offset - 2] = maxLen;
     if (maxLen == lenLimit)
@@ -787,14 +867,12 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
       SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
       MOVE_POS_RET;
     }
+    break;
   }
-
-  if (maxLen < 4)
-    maxLen = 4;
   
-  GET_MATCHES_FOOTER(offset, maxLen)
+  GET_MATCHES_FOOTER_BT(maxLen)
 }
-*/
+
 
 static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
@@ -816,27 +894,38 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
   (hash + kFix3HashSize)[h3] = pos;
   (hash + kFix4HashSize)[hv] = pos;
 
-  maxLen = 0;
+  maxLen = 3;
   offset = 0;
 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
-  {
-    maxLen = 2;
-    distances[0] = 2;
-    distances[1] = d2 - 1;
-    offset = 2;
-  }
-  
-  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
-  {
-    maxLen = 3;
-    distances[(size_t)offset + 1] = d3 - 1;
-    offset += 2;
-    d2 = d3;
-  }
-  
-  if (offset != 0)
+  for (;;)
   {
+    if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+    {
+      distances[0] = 2;
+      distances[1] = d2 - 1;
+      offset = 2;
+      if (*(cur - d2 + 2) == cur[2])
+      {
+        // distances[0] = 3;
+      }
+      else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+      {
+        d2 = d3;
+        distances[2 + 1] = d3 - 1;
+        offset = 4;
+      }
+      else
+        break;
+    }
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      d2 = d3;
+      distances[1] = d3 - 1;
+      offset = 2;
+    }
+    else
+      break;
+
     UPDATE_maxLen
     distances[(size_t)offset - 2] = (UInt32)maxLen;
     if (maxLen == lenLimit)
@@ -844,20 +933,16 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
       p->son[p->cyclicBufferPos] = curMatch;
       MOVE_POS_RET;
     }
+    break;
   }
   
-  if (maxLen < 3)
-    maxLen = 3;
-
-  offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
-      distances + offset, maxLen) - (distances));
-  MOVE_POS_RET
+  GET_MATCHES_FOOTER_HC(maxLen);
 }
 
-/*
+
 static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
-  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+  UInt32 h2, h3, d2, d3, maxLen, offset, pos;
   UInt32 *hash;
   GET_MATCHES_HEADER(5)
 
@@ -865,56 +950,52 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 
   hash = p->hash;
   pos = p->pos;
-  
+
   d2 = pos - hash                  [h2];
   d3 = pos - (hash + kFix3HashSize)[h3];
-  d4 = pos - (hash + kFix4HashSize)[h4];
+  // d4 = pos - (hash + kFix4HashSize)[h4];
 
   curMatch = (hash + kFix5HashSize)[hv];
 
   hash                  [h2] = pos;
   (hash + kFix3HashSize)[h3] = pos;
-  (hash + kFix4HashSize)[h4] = pos;
+  // (hash + kFix4HashSize)[h4] = pos;
   (hash + kFix5HashSize)[hv] = pos;
 
-  maxLen = 0;
+  maxLen = 4;
   offset = 0;
 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  for (;;)
   {
-    distances[0] = maxLen = 2;
-    distances[1] = d2 - 1;
-    offset = 2;
-    if (*(cur - d2 + 2) == cur[2])
-      distances[0] = maxLen = 3;
+    if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+    {
+      distances[0] = 2;
+      distances[1] = d2 - 1;
+      offset = 2;
+      if (*(cur - d2 + 2) == cur[2])
+      {
+      }
+      else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+      {
+        distances[3] = d3 - 1;
+        offset = 4;
+        d2 = d3;
+      }
+      else
+        break;
+    }
     else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
     {
-      distances[2] = maxLen = 3;
-      distances[3] = d3 - 1;
-      offset = 4;
+      distances[1] = d3 - 1;
+      offset = 2;
       d2 = d3;
     }
-  }
-  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
-  {
-    distances[0] = maxLen = 3;
-    distances[1] = d3 - 1;
-    offset = 2;
-    d2 = d3;
-  }
-  
-  if (d2 != d4 && d4 < p->cyclicBufferSize
-      && *(cur - d4) == *cur
-      && *(cur - d4 + 3) == *(cur + 3))
-  {
-    maxLen = 4;
-    distances[(size_t)offset + 1] = d4 - 1;
-    offset += 2;
-    d2 = d4;
-  }
-  
-  if (offset != 0)
-  {
+    else
+      break;
+
+    distances[(size_t)offset - 2] = 3;
+    if (*(cur - d2 + 3) != cur[3])
+      break;
     UPDATE_maxLen
     distances[(size_t)offset - 2] = maxLen;
     if (maxLen == lenLimit)
@@ -922,16 +1003,12 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
       p->son[p->cyclicBufferPos] = curMatch;
       MOVE_POS_RET;
     }
+    break;
   }
   
-  if (maxLen < 4)
-    maxLen = 4;
-
-  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
-      distances + offset, maxLen) - (distances));
-  MOVE_POS_RET
+  GET_MATCHES_FOOTER_HC(maxLen);
 }
-*/
+
 
 UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
@@ -940,11 +1017,11 @@ UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
   HASH_ZIP_CALC;
   curMatch = p->hash[hv];
   p->hash[hv] = p->pos;
-  offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
-      distances, 2) - (distances));
-  MOVE_POS_RET
+  offset = 0;
+  GET_MATCHES_FOOTER_HC(2)
 }
 
+
 static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
   do
@@ -1006,12 +1083,11 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
   while (--num != 0);
 }
 
-/*
 static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
   do
   {
-    UInt32 h2, h3, h4;
+    UInt32 h2, h3;
     UInt32 *hash;
     SKIP_HEADER(5)
     HASH5_CALC;
@@ -1019,13 +1095,12 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
     curMatch = (hash + kFix5HashSize)[hv];
     hash                  [h2] =
     (hash + kFix3HashSize)[h3] =
-    (hash + kFix4HashSize)[h4] =
+    // (hash + kFix4HashSize)[h4] =
     (hash + kFix5HashSize)[hv] = p->pos;
     SKIP_FOOTER
   }
   while (--num != 0);
 }
-*/
 
 static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
@@ -1046,27 +1121,26 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
   while (--num != 0);
 }
 
-/*
 static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
   do
   {
-    UInt32 h2, h3, h4;
+    UInt32 h2, h3;
     UInt32 *hash;
     SKIP_HEADER(5)
     HASH5_CALC;
     hash = p->hash;
-    curMatch = hash + kFix5HashSize)[hv];
+    curMatch = (hash + kFix5HashSize)[hv];
     hash                  [h2] =
     (hash + kFix3HashSize)[h3] =
-    (hash + kFix4HashSize)[h4] =
+    // (hash + kFix4HashSize)[h4] =
     (hash + kFix5HashSize)[hv] = p->pos;
     p->son[p->cyclicBufferPos] = curMatch;
     MOVE_POS
   }
   while (--num != 0);
 }
-*/
+
 
 void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
@@ -1089,18 +1163,16 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
   vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
   if (!p->btMode)
   {
-    /* if (p->numHashBytes <= 4) */
+    if (p->numHashBytes <= 4)
     {
       vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
       vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
     }
-    /*
     else
     {
       vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
       vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
     }
-    */
   }
   else if (p->numHashBytes == 2)
   {
@@ -1112,16 +1184,14 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
     vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
     vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
   }
-  else /* if (p->numHashBytes == 4) */
+  else if (p->numHashBytes == 4)
   {
     vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
     vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
   }
-  /*
   else
   {
     vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
     vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
   }
-  */
 }
diff --git a/deps/LZMA-SDK/C/LzFind.h b/deps/LZMA-SDK/C/LzFind.h
index c77added7..3e2248e7d 100644
--- a/deps/LZMA-SDK/C/LzFind.h
+++ b/deps/LZMA-SDK/C/LzFind.h
@@ -1,5 +1,5 @@
 /* LzFind.h -- Match finder for LZ algorithms
-2017-06-10 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #ifndef __LZ_FIND_H
 #define __LZ_FIND_H
@@ -61,7 +61,7 @@ typedef struct _CMatchFinder
         && (!(p)->directInput || (p)->directInputRem == 0))
       
 int MatchFinder_NeedMove(CMatchFinder *p);
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
 void MatchFinder_MoveBlock(CMatchFinder *p);
 void MatchFinder_ReadIfRequired(CMatchFinder *p);
 
diff --git a/deps/LZMA-SDK/C/LzFindMt.c b/deps/LZMA-SDK/C/LzFindMt.c
index df32146f9..cb29a1eac 100644
--- a/deps/LZMA-SDK/C/LzFindMt.c
+++ b/deps/LZMA-SDK/C/LzFindMt.c
@@ -1,12 +1,64 @@
 /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
-2018-12-29 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
-#include "LzHash.h"
+#include "CpuArch.h"
 
+#include "LzHash.h"
 #include "LzFindMt.h"
 
+// #define LOG_ITERS
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+static UInt64 g_NumIters_Tree;
+static UInt64 g_NumIters_Loop;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+#define kMtHashBlockSize (1 << 17)
+#define kMtHashNumBlocks (1 << 1)
+#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
+
+#define kMtBtBlockSize (1 << 16)
+#define kMtBtNumBlocks (1 << 4)
+#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
+
+/*
+  HASH functions:
+  We use raw 8/16 bits from a[1] and a[2],
+  xored with crc(a[0]) and crc(a[3]).
+  We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
+  our crc() function provides one-to-one correspondence for low 8-bit values:
+    (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
+*/
+
+#define MT_HASH2_CALC \
+  h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+/*
+#define MT_HASH3_CALC__NO_2 { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define __MT_HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
+  // (kHash4Size - 1);
+*/
+
+
 static void MtSync_Construct(CMtSync *p)
 {
   p->wasCreated = False;
@@ -18,8 +70,11 @@ static void MtSync_Construct(CMtSync *p)
   Event_Construct(&p->wasStopped);
   Semaphore_Construct(&p->freeSemaphore);
   Semaphore_Construct(&p->filledSemaphore);
+  p->affinity = 0;
 }
 
+
+MY_NO_INLINE
 static void MtSync_GetNextBlock(CMtSync *p)
 {
   if (p->needStart)
@@ -81,8 +136,7 @@ static void MtSync_Destruct(CMtSync *p)
     p->exit = True;
     if (p->needStart)
       Event_Set(&p->canStart);
-    Thread_Wait(&p->thread);
-    Thread_Close(&p->thread);
+    Thread_Wait_Close(&p->thread);
   }
   if (p->csWasInitialized)
   {
@@ -103,6 +157,7 @@ static void MtSync_Destruct(CMtSync *p)
 
 static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
 {
+  WRes wres;
   if (p->wasCreated)
     return SZ_OK;
 
@@ -117,8 +172,12 @@ static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj,
   RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
 
   p->needStart = True;
-  
-  RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
+
+  if (p->affinity != 0)
+    wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
+  else
+    wres = Thread_Create(&p->thread, startAddress, obj);
+  RINOK_THREAD(wres);
   p->wasCreated = True;
   return SZ_OK;
 }
@@ -131,23 +190,161 @@ static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj,
   return res;
 }
 
-void MtSync_Init(CMtSync *p) { p->needStart = True; }
+// static void MtSync_Init(CMtSync *p) { p->needStart = True; }
 
 #define kMtMaxValForNormalize 0xFFFFFFFF
+// #define kMtMaxValForNormalize ((1 << 25) + (1 << 20))
 
-#define DEF_GetHeads2(name, v, action) \
-  static void GetHeads ## name(const Byte *p, UInt32 pos, \
-      UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
-    { action; for (; numHeads != 0; numHeads--) { \
-      const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++;  } }
 
+#ifdef MY_CPU_LE_UNALIGN
+  #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
+#else
+  #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
+#endif
+
+#define GetHeads_DECL(name) \
+    static void GetHeads ## name(const Byte *p, UInt32 pos, \
+      UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
+
+#define GetHeads_LOOP(v) \
+    for (; numHeads != 0; numHeads--) { \
+      const UInt32 value = (v); \
+      p++; \
+      *heads++ = pos - hash[value]; \
+      hash[value] = pos++; }
+
+#define DEF_GetHeads2(name, v, action) \
+    GetHeads_DECL(name) { action \
+    GetHeads_LOOP(v) }
+ 
 #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
 
-DEF_GetHeads2(2,  (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
-DEF_GetHeads(3,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
-DEF_GetHeads(4,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
-DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
-/* DEF_GetHeads(5,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
+DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+DEF_GetHeads(3,  (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
+DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+// BT3 is not good for crc collisions for big hashMask values.
+
+/*
+GetHeads_DECL(3b)
+{
+  UNUSED_VAR(hashMask);
+  UNUSED_VAR(crc);
+  {
+  const Byte *pLim = p + numHeads;
+  if (numHeads == 0)
+    return;
+  pLim--;
+  while (p < pLim)
+  {
+    UInt32 v1 = GetUi32(p);
+    UInt32 v0 = v1 & 0xFFFFFF;
+    UInt32 h0, h1;
+    p += 2;
+    v1 >>= 8;
+    h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
+    h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
+    heads += 2;
+  }
+  if (p == pLim)
+  {
+    UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
+    *heads = pos - hash[v0];
+    hash[v0] = pos;
+  }
+  }
+}
+*/
+
+/*
+GetHeads_DECL(4)
+{
+  unsigned sh = 0;
+  UNUSED_VAR(crc)
+  while ((hashMask & 0x80000000) == 0)
+  {
+    hashMask <<= 1;
+    sh++;
+  }
+  GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
+}
+#define GetHeads4b GetHeads4
+*/
+
+#define USE_GetHeads_LOCAL_CRC
+
+#ifdef USE_GetHeads_LOCAL_CRC
+
+GetHeads_DECL(4)
+{
+  UInt32 crc0[256];
+  UInt32 crc1[256];
+  {
+    unsigned i;
+    for (i = 0; i < 256; i++)
+    {
+      UInt32 v = crc[i];
+      crc0[i] = v & hashMask;
+      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+      // crc1[i] = rotlFixed(v, 8) & hashMask;
+    }
+  }
+  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(4b)
+{
+  UInt32 crc0[256];
+  {
+    unsigned i;
+    for (i = 0; i < 256; i++)
+      crc0[i] = crc[i] & hashMask;
+  }
+  GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
+}
+
+GetHeads_DECL(5)
+{
+  UInt32 crc0[256];
+  UInt32 crc1[256];
+  UInt32 crc2[256];
+  {
+    unsigned i;
+    for (i = 0; i < 256; i++)
+    {
+      UInt32 v = crc[i];
+      crc0[i] = v & hashMask;
+      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+      crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
+    }
+  }
+  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(5b)
+{
+  UInt32 crc0[256];
+  UInt32 crc1[256];
+  {
+    unsigned i;
+    for (i = 0; i < 256; i++)
+    {
+      UInt32 v = crc[i];
+      crc0[i] = v & hashMask;
+      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+    }
+  }
+  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
+}
+
+#else
+
+DEF_GetHeads(4,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
+DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
+DEF_GetHeads(5,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
+DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
+
+#endif
+ 
 
 static void HashThreadFunc(CMatchFinderMt *mt)
 {
@@ -244,11 +441,11 @@ static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
 MY_NO_INLINE
 static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
-    UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
+    UInt32 *d, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
 {
   do
   {
-  UInt32 *_distances = ++distances;
+  UInt32 *_distances = ++d;
   UInt32 delta = *hash++;
 
   CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
@@ -258,14 +455,15 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
   unsigned maxLen = (unsigned)_maxLen;
 
   /*
-  if (size > 1)
+  #define PREF_STEP 1
+  if (size > PREF_STEP)
   {
-    UInt32 delta = *hash;
+    UInt32 delta = hash[PREF_STEP - 1];
     if (delta < _cyclicBufferSize)
     {
-      UInt32 cyc1 = _cyclicBufferPos + 1;
+      size_t cyc1 = _cyclicBufferPos + PREF_STEP;
       CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
-      Byte b = *(cur + 1 - delta);
+      Byte b = *(cur + PREF_STEP - delta);
       _distances[0] = pair[0];
       _distances[1] = b;
     }
@@ -276,8 +474,9 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
     *ptr0 = *ptr1 = kEmptyHashValue;
   }
   else
-  for(;;)
+  for (LOG_ITER(g_NumIters_Tree++);;)
   {
+    LOG_ITER(g_NumIters_Loop++);
     {
       CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
       const Byte *pb = cur - delta;
@@ -292,8 +491,8 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
         if (maxLen < len)
         {
           maxLen = len;
-          *distances++ = (UInt32)len;
-          *distances++ = delta - 1;
+          *d++ = (UInt32)len;
+          *d++ = delta - 1;
           if (len == lenLimit)
           {
             UInt32 pair1 = pair[1];
@@ -333,39 +532,39 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
   _cyclicBufferPos++;
   cur++;
   {
-    UInt32 num = (UInt32)(distances - _distances);
+    UInt32 num = (UInt32)(d - _distances);
     _distances[-1] = num;
   }
   }
-  while (distances < limit && --size != 0);
+  while (d < limit && --size != 0);
   *posRes = pos;
-  return distances;
+  return d;
 }
 
 #endif
 
 
 
-static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
+static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
 {
   UInt32 numProcessed = 0;
   UInt32 curPos = 2;
   UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); //  * 2
   
-  distances[1] = p->hashNumAvail;
+  d[1] = p->hashNumAvail;
   
   while (curPos < limit)
   {
     if (p->hashBufPos == p->hashBufPosLimit)
     {
       MatchFinderMt_GetNextBlock_Hash(p);
-      distances[1] = numProcessed + p->hashNumAvail;
+      d[1] = numProcessed + p->hashNumAvail;
       if (p->hashNumAvail >= p->numHashBytes)
         continue;
-      distances[0] = curPos + p->hashNumAvail;
-      distances += curPos;
+      d[0] = curPos + p->hashNumAvail;
+      d += curPos;
       for (; p->hashNumAvail != 0; p->hashNumAvail--)
-        *distances++ = 0;
+        *d++ = 0;
       return;
     }
     {
@@ -387,7 +586,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
       #ifndef MFMT_GM_INLINE
       while (curPos < limit && size-- != 0)
       {
-        UInt32 *startDistances = distances + curPos;
+        UInt32 *startDistances = d + curPos;
         UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
             pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
             startDistances + 1, p->numHashBytes - 1) - startDistances);
@@ -401,9 +600,9 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
       {
         UInt32 posRes;
         curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
-            distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
-            distances + limit,
-            size, &posRes) - distances);
+            d + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
+            d + limit,
+            size, &posRes) - d);
         p->hashBufPos += posRes - pos;
         cyclicBufferPos += posRes - pos;
         p->buffer += posRes - pos;
@@ -420,7 +619,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
     }
   }
   
-  distances[0] = curPos;
+  d[0] = curPos;
 }
 
 static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
@@ -448,7 +647,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
   }
 }
 
-void BtThreadFunc(CMatchFinderMt *mt)
+static void BtThreadFunc(CMatchFinderMt *mt)
 {
   CMtSync *p = &mt->btSync;
   for (;;)
@@ -491,6 +690,14 @@ void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
 {
   MtSync_Destruct(&p->hashSync);
   MtSync_Destruct(&p->btSync);
+
+  LOG_ITER(
+  printf("\nTree %9d * %7d iter = %9d sum \n",
+      (UInt32)(g_NumIters_Tree / 1000),
+      (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
+      (UInt32)(g_NumIters_Loop / 1000)
+      ));
+
   MatchFinderMt_FreeMem(p, alloc);
 }
 
@@ -553,6 +760,7 @@ static void MatchFinderMt_Init(CMatchFinderMt *p)
 
   p->hash = mf->hash;
   p->fixedHashSize = mf->fixedHashSize;
+  // p->hash4Mask = mf->hash4Mask;
   p->crc = mf->crc;
 
   p->son = mf->son;
@@ -572,22 +780,24 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
   /* p->MatchFinder->ReleaseStream(); */
 }
 
-static void MatchFinderMt_Normalize(CMatchFinderMt *p)
-{
-  MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
-  p->lzPos = p->historySize + 1;
-}
 
+MY_NO_INLINE
 static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
 {
-  UInt32 blockIndex;
+  UInt32 blockIndex, k;
+  
   MtSync_GetNextBlock(&p->btSync);
+  
   blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
-  p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
-  p->btBufPosLimit += p->btBuf[p->btBufPos++];
-  p->btNumAvailBytes = p->btBuf[p->btBufPos++];
+  k = blockIndex * kMtBtBlockSize;
+  p->btBufPosLimit = k + p->btBuf[k];
+  p->btNumAvailBytes = p->btBuf[k + 1];
+  p->btBufPos = k + 2;
   if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
-    MatchFinderMt_Normalize(p);
+  {
+    MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
+    p->lzPos = p->historySize + 1;
+  }
 }
 
 static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
@@ -603,170 +813,289 @@ static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
   return p->btNumAvailBytes;
 }
 
-static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
 {
-  UInt32 h2, curMatch2;
+  UInt32 h2, c2;
   UInt32 *hash = p->hash;
   const Byte *cur = p->pointerToCurPos;
-  UInt32 lzPos = p->lzPos;
+  UInt32 m = p->lzPos;
   MT_HASH2_CALC
       
-  curMatch2 = hash[h2];
-  hash[h2] = lzPos;
+  c2 = hash[h2];
+  hash[h2] = m;
 
-  if (curMatch2 >= matchMinPos)
-    if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+  if (c2 >= matchMinPos)
+    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
     {
-      *distances++ = 2;
-      *distances++ = lzPos - curMatch2 - 1;
+      *d++ = 2;
+      *d++ = m - c2 - 1;
     }
   
-  return distances;
+  return d;
 }
 
-static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
 {
-  UInt32 h2, h3, curMatch2, curMatch3;
+  UInt32 h2, h3, c2, c3;
   UInt32 *hash = p->hash;
   const Byte *cur = p->pointerToCurPos;
-  UInt32 lzPos = p->lzPos;
+  UInt32 m = p->lzPos;
   MT_HASH3_CALC
 
-  curMatch2 = hash[                h2];
-  curMatch3 = (hash + kFix3HashSize)[h3];
+  c2 = hash[h2];
+  c3 = (hash + kFix3HashSize)[h3];
   
-  hash[                h2] = lzPos;
-  (hash + kFix3HashSize)[h3] = lzPos;
+  hash[h2] = m;
+  (hash + kFix3HashSize)[h3] = m;
 
-  if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
   {
-    distances[1] = lzPos - curMatch2 - 1;
-    if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+    d[1] = m - c2 - 1;
+    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
     {
-      distances[0] = 3;
-      return distances + 2;
+      d[0] = 3;
+      return d + 2;
     }
-    distances[0] = 2;
-    distances += 2;
+    d[0] = 2;
+    d += 2;
   }
   
-  if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
   {
-    *distances++ = 3;
-    *distances++ = lzPos - curMatch3 - 1;
+    *d++ = 3;
+    *d++ = m - c3 - 1;
   }
   
-  return distances;
+  return d;
 }
 
+
+#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
+
 /*
-static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static
+UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
 {
-  UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
+  UInt32 pos = p->btBufPos;
+  const UInt32 *bt = p->btBuf + pos;
+  UInt32 len = *bt++;
+  UInt32 matchMinPos;
+  const UInt32 *d_base = d;
+  UInt32 avail = p->btNumAvailBytes - 1;
+  p->btBufPos = pos + 1 + len;
+
+  {
+    UInt32 temp1 = p->historySize;
+    p->btNumAvailBytes = avail;
+
+    #define BT_HASH_BYTES_MAX 5
+
+    if (len != 0)
+      temp1 = bt[1];
+    else if (avail < (BT_HASH_BYTES_MAX - 2))
+    {
+      INCREASE_LZ_POS
+      return 0;
+    }
+    matchMinPos = p->lzPos - temp1;
+  }
+
+  for (;;)
+  {
+  
+  UInt32 h2, h3, c2, c3;
   UInt32 *hash = p->hash;
   const Byte *cur = p->pointerToCurPos;
-  UInt32 lzPos = p->lzPos;
-  MT_HASH4_CALC
-      
-  curMatch2 = hash[                h2];
-  curMatch3 = (hash + kFix3HashSize)[h3];
-  curMatch4 = (hash + kFix4HashSize)[h4];
+  UInt32 m = p->lzPos;
+  MT_HASH3_CALC
+
+  c2 = hash[h2];
+  c3 = (hash + kFix3HashSize)[h3];
+ 
+  hash[h2] = m;
+  (hash + kFix3HashSize)[h3] = m;
+
+  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+  {
+    d[1] = m - c2 - 1;
+    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+    {
+      d[0] = 3;
+      d += 2;
+      break;
+    }
+    // else
+    {
+      d[0] = 2;
+      d += 2;
+    }
+  }
+  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+  {
+    *d++ = 3;
+    *d++ = m - c3 - 1;
+  }
+  break;
+  }
+
+  if (len != 0)
+  {
+    do
+    {
+      UInt32 v0 = bt[0];
+      UInt32 v1 = bt[1];
+      bt += 2;
+      d[0] = v0;
+      d[1] = v1;
+      d += 2;
+    }
+    while ((len -= 2) != 0);
+  }
+  INCREASE_LZ_POS
+  return (UInt32)(d - d_base);
+}
+*/
+
+
+static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
+{
+  UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
+  UInt32 *hash = p->hash;
+  const Byte *cur = p->pointerToCurPos;
+  UInt32 m = p->lzPos;
+  MT_HASH3_CALC
+  // MT_HASH4_CALC
+  c2 = hash[h2];
+  c3 = (hash + kFix3HashSize)[h3];
+  // c4 = (hash + kFix4HashSize)[h4];
   
-  hash[                h2] = lzPos;
-  (hash + kFix3HashSize)[h3] = lzPos;
-  (hash + kFix4HashSize)[h4] = lzPos;
+  hash[h2] = m;
+  (hash + kFix3HashSize)[h3] = m;
+  // (hash + kFix4HashSize)[h4] = m;
+
+  #define _USE_H2
 
-  if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+  #ifdef _USE_H2
+  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
   {
-    distances[1] = lzPos - curMatch2 - 1;
-    if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+    d[1] = m - c2 - 1;
+    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
     {
-      distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
-      return distances + 2;
+      // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
+      // return d + 2;
+
+      if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
+      {
+        d[0] = 4;
+        return d + 2;
+      }
+      d[0] = 3;
+      d += 2;
+    
+      #ifdef _USE_H4
+      if (c4 >= matchMinPos)
+        if (
+          cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
+          cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
+          )
+      {
+        *d++ = 4;
+        *d++ = m - c4 - 1;
+      }
+      #endif
+      return d;
     }
-    distances[0] = 2;
-    distances += 2;
+    d[0] = 2;
+    d += 2;
   }
+  #endif
   
-  if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
   {
-    distances[1] = lzPos - curMatch3 - 1;
-    if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
+    d[1] = m - c3 - 1;
+    if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
     {
-      distances[0] = 4;
-      return distances + 2;
+      d[0] = 4;
+      return d + 2;
     }
-    distances[0] = 3;
-    distances += 2;
+    d[0] = 3;
+    d += 2;
   }
 
-  if (curMatch4 >= matchMinPos)
+  #ifdef _USE_H4
+  if (c4 >= matchMinPos)
     if (
-      cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
-      cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
+      cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
+      cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
       )
     {
-      *distances++ = 4;
-      *distances++ = lzPos - curMatch4 - 1;
+      *d++ = 4;
+      *d++ = m - c4 - 1;
     }
+  #endif
   
-  return distances;
+  return d;
 }
-*/
 
-#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
 
-static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
+static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
 {
-  const UInt32 *btBuf = p->btBuf + p->btBufPos;
-  UInt32 len = *btBuf++;
+  const UInt32 *bt = p->btBuf + p->btBufPos;
+  UInt32 len = *bt++;
   p->btBufPos += 1 + len;
   p->btNumAvailBytes--;
   {
     UInt32 i;
     for (i = 0; i < len; i += 2)
     {
-      UInt32 v0 = btBuf[0];
-      UInt32 v1 = btBuf[1];
-      btBuf += 2;
-      distances[0] = v0;
-      distances[1] = v1;
-      distances += 2;
+      UInt32 v0 = bt[0];
+      UInt32 v1 = bt[1];
+      bt += 2;
+      d[0] = v0;
+      d[1] = v1;
+      d += 2;
     }
   }
   INCREASE_LZ_POS
   return len;
 }
 
-static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
-{
-  const UInt32 *btBuf = p->btBuf + p->btBufPos;
-  UInt32 len = *btBuf++;
-  p->btBufPos += 1 + len;
 
+
+static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
+{
+  UInt32 pos = p->btBufPos;
+  const UInt32 *bt = p->btBuf + pos;
+  UInt32 len = *bt++;
+  UInt32 avail = p->btNumAvailBytes - 1;
+  p->btNumAvailBytes = avail;
+  p->btBufPos = pos + 1 + len;
   if (len == 0)
   {
-    /* change for bt5 ! */
-    if (p->btNumAvailBytes-- >= 4)
-      len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
+    #define BT_HASH_BYTES_MAX 5
+    if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
+      len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, d) - d);
   }
   else
   {
-    /* Condition: there are matches in btBuf with length < p->numHashBytes */
-    UInt32 *distances2;
-    p->btNumAvailBytes--;
-    distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
+    /*
+      first match pair from BinTree: (match_len, match_dist),
+      (match_len >= numHashBytes).
+      MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
+    */
+    UInt32 *d2;
+    d2 = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
     do
     {
-      UInt32 v0 = btBuf[0];
-      UInt32 v1 = btBuf[1];
-      btBuf += 2;
-      distances2[0] = v0;
-      distances2[1] = v1;
-      distances2 += 2;
+      UInt32 v0 = bt[0];
+      UInt32 v1 = bt[1];
+      bt += 2;
+      d2[0] = v0;
+      d2[1] = v1;
+      d2 += 2;
     }
     while ((len -= 2) != 0);
-    len = (UInt32)(distances2 - (distances));
+    len = (UInt32)(d2 - d);
   }
   INCREASE_LZ_POS
   return len;
@@ -802,19 +1131,18 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
   SKIP_FOOTER_MT
 }
 
-/*
 static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
 {
   SKIP_HEADER_MT(4)
-      UInt32 h2, h3, h4;
-      MT_HASH4_CALC
-      (hash + kFix4HashSize)[h4] =
+      UInt32 h2, h3 /*, h4 */;
+      MT_HASH3_CALC
+      // MT_HASH4_CALC
+      // (hash + kFix4HashSize)[h4] =
       (hash + kFix3HashSize)[h3] =
       hash[                h2] =
         p->lzPos;
   SKIP_FOOTER_MT
 }
-*/
 
 void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
 {
@@ -832,22 +1160,23 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
       vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
       break;
     case 3:
-      p->GetHeadsFunc = GetHeads3;
+      p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads3b : GetHeads3;
       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
       vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
       break;
-    default:
-    /* case 4: */
+    case 4:
       p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
+
+      // it's fast inline version of GetMatches()
+      // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
+
       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
       vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
       break;
-    /*
     default:
-      p->GetHeadsFunc = GetHeads5;
+      p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads5b : GetHeads5;
       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
       vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
       break;
-    */
   }
 }
diff --git a/deps/LZMA-SDK/C/LzFindMt.h b/deps/LZMA-SDK/C/LzFindMt.h
index fdd17008c..888c787cb 100644
--- a/deps/LZMA-SDK/C/LzFindMt.h
+++ b/deps/LZMA-SDK/C/LzFindMt.h
@@ -1,5 +1,5 @@
 /* LzFindMt.h -- multithreaded Match finder for LZ algorithms
-2018-07-04 : Igor Pavlov : Public domain */
+2019-11-05 : Igor Pavlov : Public domain */
 
 #ifndef __LZ_FIND_MT_H
 #define __LZ_FIND_MT_H
@@ -9,14 +9,6 @@
 
 EXTERN_C_BEGIN
 
-#define kMtHashBlockSize (1 << 13)
-#define kMtHashNumBlocks (1 << 3)
-#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
-
-#define kMtBtBlockSize (1 << 14)
-#define kMtBtNumBlocks (1 << 6)
-#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
-
 typedef struct _CMtSync
 {
   BoolInt wasCreated;
@@ -34,6 +26,7 @@ typedef struct _CMtSync
   BoolInt csWasEntered;
   CCriticalSection cs;
   UInt32 numProcessedBlocks;
+  UInt64 affinity;
 } CMtSync;
 
 typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@@ -56,11 +49,12 @@ typedef struct _CMatchFinderMt
 
   UInt32 *hash;
   UInt32 fixedHashSize;
+  // UInt32 hash4Mask;
   UInt32 historySize;
   const UInt32 *crc;
 
   Mf_Mix_Matches MixMatchesFunc;
-  
+
   /* LZ + BT */
   CMtSync btSync;
   Byte btDummy[kMtCacheLineDummy];
diff --git a/deps/LZMA-SDK/C/LzHash.h b/deps/LZMA-SDK/C/LzHash.h
index 219144407..a682f83be 100644
--- a/deps/LZMA-SDK/C/LzHash.h
+++ b/deps/LZMA-SDK/C/LzHash.h
@@ -1,57 +1,34 @@
 /* LzHash.h -- HASH functions for LZ algorithms
-2015-04-12 : Igor Pavlov : Public domain */
+2019-10-30 : Igor Pavlov : Public domain */
 
 #ifndef __LZ_HASH_H
 #define __LZ_HASH_H
 
+/*
+  (kHash2Size >= (1 <<  8)) : Required
+  (kHash3Size >= (1 << 16)) : Required
+*/
+
 #define kHash2Size (1 << 10)
 #define kHash3Size (1 << 16)
-#define kHash4Size (1 << 20)
+// #define kHash4Size (1 << 20)
 
 #define kFix3HashSize (kHash2Size)
 #define kFix4HashSize (kHash2Size + kHash3Size)
-#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
-
-#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
-
-#define HASH3_CALC { \
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
-  h2 = temp & (kHash2Size - 1); \
-  hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
-
-#define HASH4_CALC { \
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
-  h2 = temp & (kHash2Size - 1); \
-  temp ^= ((UInt32)cur[2] << 8); \
-  h3 = temp & (kHash3Size - 1); \
-  hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
-
-#define HASH5_CALC { \
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
-  h2 = temp & (kHash2Size - 1); \
-  temp ^= ((UInt32)cur[2] << 8); \
-  h3 = temp & (kHash3Size - 1); \
-  temp ^= (p->crc[cur[3]] << 5); \
-  h4 = temp & (kHash4Size - 1); \
-  hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
-
-/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
-#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
-
-
-#define MT_HASH2_CALC \
-  h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
-
-#define MT_HASH3_CALC { \
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
-  h2 = temp & (kHash2Size - 1); \
-  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
-
-#define MT_HASH4_CALC { \
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
-  h2 = temp & (kHash2Size - 1); \
-  temp ^= ((UInt32)cur[2] << 8); \
-  h3 = temp & (kHash3Size - 1); \
-  h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+/*
+  We use up to 3 crc values for hash:
+    crc0
+    crc1 << Shift_1
+    crc2 << Shift_2
+  (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
+  Small values for Shift are not good for collision rate.
+  Big value for Shift_2 increases the minimum size
+  of hash table, that will be slow for small files.
+*/
+
+#define kLzHash_CrcShift_1 5
+#define kLzHash_CrcShift_2 10
 
 #endif
diff --git a/deps/LZMA-SDK/C/Lzma2Dec.c b/deps/LZMA-SDK/C/Lzma2Dec.c
index 2e631051b..f9f98095d 100644
--- a/deps/LZMA-SDK/C/Lzma2Dec.c
+++ b/deps/LZMA-SDK/C/Lzma2Dec.c
@@ -1,5 +1,5 @@
 /* Lzma2Dec.c -- LZMA2 Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 /* #define SHOW_DEBUG_INFO */
 
@@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p)
   LzmaDec_Init(&p->decoder);
 }
 
-static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+// ELzma2State
+static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
 {
   switch (p->state)
   {
diff --git a/deps/LZMA-SDK/C/Lzma2DecMt.c b/deps/LZMA-SDK/C/Lzma2DecMt.c
index 87d5567ad..252b5be49 100644
--- a/deps/LZMA-SDK/C/Lzma2DecMt.c
+++ b/deps/LZMA-SDK/C/Lzma2DecMt.c
@@ -1,25 +1,25 @@
 /* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
 // #define SHOW_DEBUG_INFO
 
+// #define _7ZIP_ST
+
 #ifdef SHOW_DEBUG_INFO
 #include <stdio.h>
 #endif
 
+#ifndef _7ZIP_ST
 #ifdef SHOW_DEBUG_INFO
 #define PRF(x) x
 #else
 #define PRF(x)
 #endif
-
 #define PRF_STR(s) PRF(printf("\n" s "\n"))
-#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
 #define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2))
-
-// #define _7ZIP_ST
+#endif
 
 #include "Alloc.h"
 
@@ -28,10 +28,10 @@
 
 #ifndef _7ZIP_ST
 #include "MtDec.h"
-#endif
-
 
 #define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28)
+#endif
+
 
 void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
 {
@@ -255,7 +255,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
         const unsigned kNumAlignBits = 12;
         const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */
         t->alloc.numAlignBits = kNumAlignBits;
-        t->alloc.offset = ((UInt32)coderIndex * ((1 << 11) + (1 << 8) + (1 << 6))) & ((1 << kNumAlignBits) - (1 << kNumCacheLineBits));
+        t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits));
         t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc;
       }
     }
@@ -527,7 +527,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
 
 static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
     BoolInt needWriteToStream,
-    const Byte *src, size_t srcSize,
+    const Byte *src, size_t srcSize, BoolInt isCross,
     BoolInt *needContinue, BoolInt *canRecode)
 {
   CLzma2DecMt *me = (CLzma2DecMt *)pp;
@@ -536,12 +536,14 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
   const Byte *data = t->outBuf;
   BoolInt needContinue2 = True;
 
+  UNUSED_VAR(src)
+  UNUSED_VAR(srcSize)
+  UNUSED_VAR(isCross)
+
   PRF_STR_INT_2("Write", coderIndex, srcSize);
 
   *needContinue = False;
   *canRecode = True;
-  UNUSED_VAR(src)
-  UNUSED_VAR(srcSize)
 
   if (
       // t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
@@ -696,7 +698,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
         inPos = 0;
         inLim = p->inBufSize;
         inData = p->inBuf;
-        p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
+        p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim);
         // p->readProcessed += inLim;
         // inLim -= 5; p->readWasFinished = True; // for test
         if (inLim == 0 || p->readRes != SZ_OK)
@@ -838,6 +840,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
   p->inProcessed = 0;
 
   p->readWasFinished = False;
+  p->readRes = SZ_OK;
 
   *isMT = False;
 
@@ -856,7 +859,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
 
   if (p->props.numThreads > 1)
   {
-    IMtDecCallback vt;
+    IMtDecCallback2 vt;
 
     Lzma2DecMt_FreeSt(p);
 
@@ -955,7 +958,12 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
     *inProcessed = p->inProcessed;
 
     // res = SZ_OK; // for test
-    if (res == SZ_OK && p->readRes != SZ_OK)
+    if (res == SZ_ERROR_INPUT_EOF)
+    {
+      if (p->readRes != SZ_OK)
+        res = p->readRes;
+    }
+    else if (res == SZ_OK && p->readRes != SZ_OK)
       res = p->readRes;
     
     /*
diff --git a/deps/LZMA-SDK/C/Lzma2Enc.c b/deps/LZMA-SDK/C/Lzma2Enc.c
index d54147752..c8b114cb4 100644
--- a/deps/LZMA-SDK/C/Lzma2Enc.c
+++ b/deps/LZMA-SDK/C/Lzma2Enc.c
@@ -1,5 +1,5 @@
 /* Lzma2Enc.c -- LZMA2 Encoder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -330,7 +330,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
         numBlocks++;
       if (numBlocks < (unsigned)t2)
       {
-        t2r = (unsigned)numBlocks;
+        t2r = (int)numBlocks;
         if (t2r == 0)
           t2r = 1;
         t3 = t1 * t2r;
@@ -632,15 +632,15 @@ static SRes Lzma2Enc_EncodeMt1(
       {
         if (outBuf)
         {
-          size_t destPos = *outBufSize;
+          const size_t destPos = *outBufSize;
           if (destPos >= outLim)
             return SZ_ERROR_OUTPUT_EOF;
-          outBuf[destPos] = 0;
+          outBuf[destPos] = LZMA2_CONTROL_EOF; // 0
           *outBufSize = destPos + 1;
         }
         else
         {
-          Byte b = 0;
+          const Byte b = LZMA2_CONTROL_EOF; // 0;
           if (ISeqOutStream_Write(outStream, &b, 1) != 1)
             return SZ_ERROR_WRITE;
         }
@@ -780,13 +780,13 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
       p->outBufSize = destBlockSize;
     }
 
-    p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max;
+    p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
     p->mtCoder.expectedDataSize = p->expectedDataSize;
     
     {
       SRes res = MtCoder_Code(&p->mtCoder);
       if (!outStream)
-        *outBufSize = p->outBuf - outBuf;
+        *outBufSize = (size_t)(p->outBuf - outBuf);
       return res;
     }
   }
diff --git a/deps/LZMA-SDK/C/Lzma86Enc.c b/deps/LZMA-SDK/C/Lzma86Enc.c
index 8d35e6dc5..99397bc5e 100644
--- a/deps/LZMA-SDK/C/Lzma86Enc.c
+++ b/deps/LZMA-SDK/C/Lzma86Enc.c
@@ -11,8 +11,6 @@
 #include "Bra.h"
 #include "LzmaEnc.h"
 
-#define SZE_OUT_OVERFLOW SZE_DATA_ERROR
-
 int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
     int level, UInt32 dictSize, int filterMode)
 {
diff --git a/deps/LZMA-SDK/C/LzmaDec.c b/deps/LZMA-SDK/C/LzmaDec.c
index 4d1576419..80b70a9ee 100644
--- a/deps/LZMA-SDK/C/LzmaDec.c
+++ b/deps/LZMA-SDK/C/LzmaDec.c
@@ -1,5 +1,5 @@
 /* LzmaDec.c -- LZMA Decoder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -13,10 +13,12 @@
 
 #define kNumBitModelTotalBits 11
 #define kBitModelTotal (1 << kNumBitModelTotalBits)
-#define kNumMoveBits 5
 
 #define RC_INIT_SIZE 5
 
+#ifndef _LZMA_DEC_OPT
+
+#define kNumMoveBits 5
 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
 
 #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
@@ -62,9 +64,10 @@
   probLit = prob + (offs + bit + symbol); \
   GET_BIT2(probLit, symbol, offs ^= bit; , ;)
 
+#endif // _LZMA_DEC_OPT
 
 
-#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
 
 #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
 #define UPDATE_0_CHECK range = bound;
@@ -114,6 +117,9 @@
 #define kMatchMinLen 2
 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
 
+#define kMatchSpecLen_Error_Data (1 << 9)
+#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
+
 /* External ASM code needs same CLzmaProb array layout. So don't change it. */
 
 /* (probs_1664) is faster and better for code size at some platforms */
@@ -166,10 +172,12 @@
 
 /*
 p->remainLen : shows status of LZMA decoder:
-    < kMatchSpecLenStart : normal remain
-    = kMatchSpecLenStart : finished
-    = kMatchSpecLenStart + 1 : need init range coder
-    = kMatchSpecLenStart + 2 : need init range coder and state
+    < kMatchSpecLenStart  : the number of bytes to be copied with (p->rep0) offset
+    = kMatchSpecLenStart  : the LZMA stream was finished with end mark
+    = kMatchSpecLenStart + 1  : need init range coder
+    = kMatchSpecLenStart + 2  : need init range coder and state
+    = kMatchSpecLen_Error_Fail                : Internal Code Failure
+    = kMatchSpecLen_Error_Data + [0 ... 273]  : LZMA Data Error
 */
 
 /* ---------- LZMA_DECODE_REAL ---------- */
@@ -188,23 +196,31 @@ In:
   {
     LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
     So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
-    is not END_OF_PAYALOAD_MARKER, then function returns error code.
+    is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
+    the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
   }
 
 Processing:
-  first LZMA symbol will be decoded in any case
-  All checks for limits are at the end of main loop,
-  It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+  The first LZMA symbol will be decoded in any case.
+  All main checks for limits are at the end of main loop,
+  It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
   RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+  But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
+  next iteration  before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
+  that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
+  So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
 
 Out:
   RangeCoder is normalized
   Result:
     SZ_OK - OK
-    SZ_ERROR_DATA - Error
-  p->remainLen:
-    < kMatchSpecLenStart : normal remain
-    = kMatchSpecLenStart : finished
+      p->remainLen:
+        < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
+        = kMatchSpecLenStart : the LZMA stream was finished with end mark
+
+    SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
+      p->remainLen : undefined
+      p->reps[*]    : undefined
 */
 
 
@@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
       else
       {
         UPDATE_1(prob);
-        /*
-        // that case was checked before with kBadRepCode
-        if (checkDicSize == 0 && processedPos == 0)
-          return SZ_ERROR_DATA;
-        */
         prob = probs + IsRepG0 + state;
         IF_BIT_0(prob)
         {
@@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
           IF_BIT_0(prob)
           {
             UPDATE_0(prob);
+  
+            // that case was checked before with kBadRepCode
+            // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
+            // The caller doesn't allow (dicPos == limit) case here
+            // so we don't need the following check:
+            // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
+            
             dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
             dicPos++;
             processedPos++;
@@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
         state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
         if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
         {
-          p->dicPos = dicPos;
-          return SZ_ERROR_DATA;
+          len += kMatchSpecLen_Error_Data + kMatchMinLen;
+          // len = kMatchSpecLen_Error_Data;
+          // len += kMatchMinLen;
+          break;
         }
       }
 
@@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
         
         if ((rem = limit - dicPos) == 0)
         {
-          p->dicPos = dicPos;
-          return SZ_ERROR_DATA;
+          /*
+          We stop decoding and return SZ_OK, and we can resume decoding later.
+          Any error conditions can be tested later in caller code.
+          For more strict mode we can stop decoding with error
+          // len += kMatchSpecLen_Error_Data;
+          */
+          break;
         }
         
         curLen = ((rem < len) ? (unsigned)rem : len);
@@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
   p->buf = buf;
   p->range = range;
   p->code = code;
-  p->remainLen = (UInt32)len;
+  p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
   p->dicPos = dicPos;
   p->processedPos = processedPos;
   p->reps[0] = rep0;
@@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
   p->reps[2] = rep2;
   p->reps[3] = rep3;
   p->state = (UInt32)state;
-
+  if (len >= kMatchSpecLen_Error_Data)
+    return SZ_ERROR_DATA;
   return SZ_OK;
 }
 #endif
 
+
+
 static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
 {
-  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  unsigned len = (unsigned)p->remainLen;
+  if (len == 0 /* || len >= kMatchSpecLenStart */)
+    return;
   {
-    Byte *dic = p->dic;
     SizeT dicPos = p->dicPos;
-    SizeT dicBufSize = p->dicBufSize;
-    unsigned len = (unsigned)p->remainLen;
-    SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
-    SizeT rem = limit - dicPos;
-    if (rem < len)
-      len = (unsigned)(rem);
+    Byte *dic;
+    SizeT dicBufSize;
+    SizeT rep0;   /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+    {
+      SizeT rem = limit - dicPos;
+      if (rem < len)
+      {
+        len = (unsigned)(rem);
+        if (len == 0)
+          return;
+      }
+    }
 
     if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
       p->checkDicSize = p->prop.dicSize;
 
     p->processedPos += (UInt32)len;
     p->remainLen -= (UInt32)len;
-    while (len != 0)
+    dic = p->dic;
+    rep0 = p->reps[0];
+    dicBufSize = p->dicBufSize;
+    do
     {
-      len--;
       dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
       dicPos++;
     }
+    while (--len);
     p->dicPos = dicPos;
   }
 }
 
 
+/*
+At staring of new stream we have one of the following symbols:
+  - Literal        - is allowed
+  - Non-Rep-Match  - is allowed only if it's end marker symbol
+  - Rep-Match      - is not allowed
+We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
+*/
+
 #define kRange0 0xFFFFFFFF
 #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
 #define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
@@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
   #error Stop_Compiling_Bad_LZMA_Check
 #endif
 
+
+/*
+LzmaDec_DecodeReal2():
+  It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
+
+We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
+and we support the following state of (p->checkDicSize):
+  if (total_processed < p->prop.dicSize) then
+  {
+    (total_processed == p->processedPos)
+    (p->checkDicSize == 0)
+  }
+  else
+    (p->checkDicSize == p->prop.dicSize)
+*/
+
 static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
 {
-  do
+  if (p->checkDicSize == 0)
   {
-    SizeT limit2 = limit;
-    if (p->checkDicSize == 0)
-    {
-      UInt32 rem = p->prop.dicSize - p->processedPos;
-      if (limit - p->dicPos > rem)
-        limit2 = p->dicPos + rem;
-
-      if (p->processedPos == 0)
-        if (p->code >= kBadRepCode)
-          return SZ_ERROR_DATA;
-    }
-
-    RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
-    
+    UInt32 rem = p->prop.dicSize - p->processedPos;
+    if (limit - p->dicPos > rem)
+      limit = p->dicPos + rem;
+  }
+  {
+    int res = LZMA_DECODE_REAL(p, limit, bufLimit);
     if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
       p->checkDicSize = p->prop.dicSize;
-    
-    LzmaDec_WriteRem(p, limit);
+    return res;
   }
-  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
-
-  return 0;
 }
 
+
+
 typedef enum
 {
-  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_INPUT_EOF, /* need more input data */
   DUMMY_LIT,
   DUMMY_MATCH,
   DUMMY_REP
 } ELzmaDummy;
 
-static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+
+#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
 {
   UInt32 range = p->range;
   UInt32 code = p->code;
-  const Byte *bufLimit = buf + inSize;
+  const Byte *bufLimit = *bufOut;
   const CLzmaProb *probs = GET_PROBS;
   unsigned state = (unsigned)p->state;
   ELzmaDummy res;
 
+  for (;;)
   {
     const CLzmaProb *prob;
     UInt32 bound;
     unsigned ttt;
-    unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+    unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
 
     prob = probs + IsMatch + COMBINED_PS_STATE;
     IF_BIT_0_CHECK(prob)
     {
       UPDATE_0_CHECK
 
-      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
-
       prob = probs + Literal;
       if (p->checkDicSize != 0 || p->processedPos != 0)
         prob += ((UInt32)LZMA_LIT_SIZE *
-            ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
-            (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+            ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+            ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
 
       if (state < kNumLitStates)
       {
@@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
           IF_BIT_0_CHECK(prob)
           {
             UPDATE_0_CHECK;
-            NORMALIZE_CHECK;
-            return DUMMY_REP;
+            break;
           }
           else
           {
@@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
         {
           unsigned numDirectBits = ((posSlot >> 1) - 1);
 
-          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
-
           if (posSlot < kEndPosModelIndex)
           {
             prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
@@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
         }
       }
     }
+    break;
   }
   NORMALIZE_CHECK;
+
+  *bufOut = buf;
   return res;
 }
 
-
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
 void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
 {
   p->remainLen = kMatchSpecLenStart + 1;
@@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p)
 }
 
 
+/*
+LZMA supports optional end_marker.
+So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
+That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
+When the decoder reaches dicLimit, it looks (finishMode) parameter:
+  if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
+  if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
+
+When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
+  1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
+  2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
+     must check (status) value. The caller can show the error,
+     if the end of stream is expected, and the (status) is noit
+     LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
+*/
+
+
+#define RETURN__NOT_FINISHED__FOR_FINISH \
+  *status = LZMA_STATUS_NOT_FINISHED; \
+  return SZ_ERROR_DATA; // for strict mode
+  // return SZ_OK; // for relaxed mode
+
+
 SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
     ELzmaFinishMode finishMode, ELzmaStatus *status)
 {
   SizeT inSize = *srcLen;
   (*srcLen) = 0;
-  
   *status = LZMA_STATUS_NOT_SPECIFIED;
 
   if (p->remainLen > kMatchSpecLenStart)
   {
+    if (p->remainLen > kMatchSpecLenStart + 2)
+      return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
+
     for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
       p->tempBuf[p->tempBufSize++] = *src++;
     if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
@@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
       | ((UInt32)p->tempBuf[2] << 16)
       | ((UInt32)p->tempBuf[3] << 8)
       | ((UInt32)p->tempBuf[4]);
+
+    if (p->checkDicSize == 0
+        && p->processedPos == 0
+        && p->code >= kBadRepCode)
+      return SZ_ERROR_DATA;
+
     p->range = 0xFFFFFFFF;
     p->tempBufSize = 0;
 
@@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
     p->remainLen = 0;
   }
 
-  LzmaDec_WriteRem(p, dicLimit);
-
-  while (p->remainLen != kMatchSpecLenStart)
+  for (;;)
   {
+    if (p->remainLen == kMatchSpecLenStart)
+    {
+      if (p->code != 0)
+        return SZ_ERROR_DATA;
+      *status = LZMA_STATUS_FINISHED_WITH_MARK;
+      return SZ_OK;
+    }
+
+    LzmaDec_WriteRem(p, dicLimit);
+
+    {
+      // (p->remainLen == 0 || p->dicPos == dicLimit)
+
       int checkEndMarkNow = 0;
 
       if (p->dicPos >= dicLimit)
@@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
         }
         if (p->remainLen != 0)
         {
-          *status = LZMA_STATUS_NOT_FINISHED;
-          return SZ_ERROR_DATA;
+          RETURN__NOT_FINISHED__FOR_FINISH;
         }
         checkEndMarkNow = 1;
       }
 
+      // (p->remainLen == 0)
+
       if (p->tempBufSize == 0)
       {
-        SizeT processed;
         const Byte *bufLimit;
+        int dummyProcessed = -1;
+        
         if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
         {
-          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
-          if (dummyRes == DUMMY_ERROR)
+          const Byte *bufOut = src + inSize;
+          
+          ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
+          
+          if (dummyRes == DUMMY_INPUT_EOF)
           {
-            memcpy(p->tempBuf, src, inSize);
-            p->tempBufSize = (unsigned)inSize;
+            size_t i;
+            if (inSize >= LZMA_REQUIRED_INPUT_MAX)
+              break;
             (*srcLen) += inSize;
+            p->tempBufSize = (unsigned)inSize;
+            for (i = 0; i < inSize; i++)
+              p->tempBuf[i] = src[i];
             *status = LZMA_STATUS_NEEDS_MORE_INPUT;
             return SZ_OK;
           }
-          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+ 
+          dummyProcessed = (int)(bufOut - src);
+          if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
+            break;
+          
+          if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
           {
-            *status = LZMA_STATUS_NOT_FINISHED;
-            return SZ_ERROR_DATA;
+            unsigned i;
+            (*srcLen) += (unsigned)dummyProcessed;
+            p->tempBufSize = (unsigned)dummyProcessed;
+            for (i = 0; i < (unsigned)dummyProcessed; i++)
+              p->tempBuf[i] = src[i];
+            // p->remainLen = kMatchSpecLen_Error_Data;
+            RETURN__NOT_FINISHED__FOR_FINISH;
           }
+          
           bufLimit = src;
+          // we will decode only one iteration
         }
         else
           bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+
         p->buf = src;
-        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
-          return SZ_ERROR_DATA;
-        processed = (SizeT)(p->buf - src);
-        (*srcLen) += processed;
-        src += processed;
-        inSize -= processed;
+        
+        {
+          int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
+          
+          SizeT processed = (SizeT)(p->buf - src);
+
+          if (dummyProcessed < 0)
+          {
+            if (processed > inSize)
+              break;
+          }
+          else if ((unsigned)dummyProcessed != processed)
+            break;
+
+          src += processed;
+          inSize -= processed;
+          (*srcLen) += processed;
+
+          if (res != SZ_OK)
+          {
+            p->remainLen = kMatchSpecLen_Error_Data;
+            return SZ_ERROR_DATA;
+          }
+        }
+        continue;
       }
-      else
+
       {
-        unsigned rem = p->tempBufSize, lookAhead = 0;
-        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
-          p->tempBuf[rem++] = src[lookAhead++];
-        p->tempBufSize = rem;
+        // we have some data in (p->tempBuf)
+        // in strict mode: tempBufSize is not enough for one Symbol decoding.
+        // in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
+
+        unsigned rem = p->tempBufSize;
+        unsigned ahead = 0;
+        int dummyProcessed = -1;
+        
+        while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
+          p->tempBuf[rem++] = src[ahead++];
+        
+        // ahead - the size of new data copied from (src) to (p->tempBuf)
+        // rem   - the size of temp buffer including new data from (src)
+        
         if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
         {
-          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
-          if (dummyRes == DUMMY_ERROR)
+          const Byte *bufOut = p->tempBuf + rem;
+        
+          ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
+          
+          if (dummyRes == DUMMY_INPUT_EOF)
           {
-            (*srcLen) += (SizeT)lookAhead;
+            if (rem >= LZMA_REQUIRED_INPUT_MAX)
+              break;
+            p->tempBufSize = rem;
+            (*srcLen) += (SizeT)ahead;
             *status = LZMA_STATUS_NEEDS_MORE_INPUT;
             return SZ_OK;
           }
-          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          
+          dummyProcessed = (int)(bufOut - p->tempBuf);
+
+          if ((unsigned)dummyProcessed < p->tempBufSize)
+            break;
+
+          if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
           {
-            *status = LZMA_STATUS_NOT_FINISHED;
-            return SZ_ERROR_DATA;
+            (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
+            p->tempBufSize = (unsigned)dummyProcessed;
+            // p->remainLen = kMatchSpecLen_Error_Data;
+            RETURN__NOT_FINISHED__FOR_FINISH;
           }
         }
+
         p->buf = p->tempBuf;
-        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
-          return SZ_ERROR_DATA;
         
         {
-          unsigned kkk = (unsigned)(p->buf - p->tempBuf);
-          if (rem < kkk)
-            return SZ_ERROR_FAIL; /* some internal error */
-          rem -= kkk;
-          if (lookAhead < rem)
-            return SZ_ERROR_FAIL; /* some internal error */
-          lookAhead -= rem;
+          // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
+          int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
+
+          SizeT processed = (SizeT)(p->buf - p->tempBuf);
+          rem = p->tempBufSize;
+          
+          if (dummyProcessed < 0)
+          {
+            if (processed > LZMA_REQUIRED_INPUT_MAX)
+              break;
+            if (processed < rem)
+              break;
+          }
+          else if ((unsigned)dummyProcessed != processed)
+            break;
+          
+          processed -= rem;
+
+          src += processed;
+          inSize -= processed;
+          (*srcLen) += processed;
+          p->tempBufSize = 0;
+          
+          if (res != SZ_OK)
+          {
+            p->remainLen = kMatchSpecLen_Error_Data;
+            return SZ_ERROR_DATA;
+          }
         }
-        (*srcLen) += (SizeT)lookAhead;
-        src += lookAhead;
-        inSize -= (SizeT)lookAhead;
-        p->tempBufSize = 0;
       }
+    }
   }
-  
-  if (p->code != 0)
-    return SZ_ERROR_DATA;
-  *status = LZMA_STATUS_FINISHED_WITH_MARK;
-  return SZ_OK;
+
+  /*  Some unexpected error: internal error of code, memory corruption or hardware failure */
+  p->remainLen = kMatchSpecLen_Error_Fail;
+  return SZ_ERROR_FAIL;
 }
 
 
+
 SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
 {
   SizeT outSize = *destLen;
diff --git a/deps/LZMA-SDK/C/LzmaDec.h b/deps/LZMA-SDK/C/LzmaDec.h
index 28ce60c3e..6194b7d12 100644
--- a/deps/LZMA-SDK/C/LzmaDec.h
+++ b/deps/LZMA-SDK/C/LzmaDec.h
@@ -1,5 +1,5 @@
 /* LzmaDec.h -- LZMA Decoder
-2018-04-21 : Igor Pavlov : Public domain */
+2020-03-19 : Igor Pavlov : Public domain */
 
 #ifndef __LZMA_DEC_H
 #define __LZMA_DEC_H
@@ -181,6 +181,7 @@ Returns:
       LZMA_STATUS_NEEDS_MORE_INPUT
       LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
   SZ_ERROR_DATA - Data error
+  SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
 */
 
 SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
@@ -223,6 +224,7 @@ Returns:
   SZ_ERROR_MEM  - Memory allocation error
   SZ_ERROR_UNSUPPORTED - Unsupported properties
   SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+  SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
 */
 
 SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
diff --git a/deps/LZMA-SDK/C/LzmaEnc.c b/deps/LZMA-SDK/C/LzmaEnc.c
index 14086fc4f..86dcb1963 100644
--- a/deps/LZMA-SDK/C/LzmaEnc.c
+++ b/deps/LZMA-SDK/C/LzmaEnc.c
@@ -1,5 +1,5 @@
 /* LzmaEnc.c -- LZMA Encoder
-2019-01-10: Igor Pavlov : Public domain */
+2021-04-01: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -19,6 +19,19 @@
 #include "LzFindMt.h"
 #endif
 
+/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+    UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
+void LzmaEnc_Finish(CLzmaEncHandle pp);
+void LzmaEnc_SaveState(CLzmaEncHandle pp);
+void LzmaEnc_RestoreState(CLzmaEncHandle pp);
+
 #ifdef SHOW_STAT
 static unsigned g_STAT_OFFSET = 0;
 #endif
@@ -36,7 +49,7 @@ static unsigned g_STAT_OFFSET = 0;
 
 #define kNumMoveReducingBits 4
 #define kNumBitPriceShiftBits 4
-#define kBitPrice (1 << kNumBitPriceShiftBits)
+// #define kBitPrice (1 << kNumBitPriceShiftBits)
 
 #define REP_LEN_COUNT 64
 
@@ -47,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
   p->reduceSize = (UInt64)(Int64)-1;
   p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
   p->writeEndMark = 0;
+  p->affinity = 0;
 }
 
 void LzmaEncProps_Normalize(CLzmaEncProps *p)
@@ -55,7 +69,13 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
   if (level < 0) level = 5;
   p->level = level;
   
-  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
+  if (p->dictSize == 0)
+    p->dictSize =
+      ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+      ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+      ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+      )));
+
   if (p->dictSize > p->reduceSize)
   {
     unsigned i;
@@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
   if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
   if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
   if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
-  if (p->numHashBytes < 0) p->numHashBytes = 4;
-  if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+  if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
+  if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
   
   if (p->numThreads < 0)
     p->numThreads =
@@ -93,7 +113,7 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
   return props.dictSize;
 }
 
-#if (_MSC_VER >= 1400)
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
 /* BSR code is fast for some new CPUs */
 /* #define LZMA_LOG_BSR */
 #endif
@@ -193,7 +213,7 @@ typedef struct
 
 #define kNumLenToPosStates 4
 #define kNumPosSlotBits 6
-#define kDicLogSizeMin 0
+// #define kDicLogSizeMin 0
 #define kDicLogSizeMax 32
 #define kDistTableSizeMax (kDicLogSizeMax * 2)
 
@@ -462,16 +482,16 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
 
   p->dictSize = props.dictSize;
   {
-    unsigned fb = props.fb;
+    unsigned fb = (unsigned)props.fb;
     if (fb < 5)
       fb = 5;
     if (fb > LZMA_MATCH_LEN_MAX)
       fb = LZMA_MATCH_LEN_MAX;
     p->numFastBytes = fb;
   }
-  p->lc = props.lc;
-  p->lp = props.lp;
-  p->pb = props.pb;
+  p->lc = (unsigned)props.lc;
+  p->lp = (unsigned)props.lp;
+  p->pb = (unsigned)props.pb;
   p->fastMode = (props.algo == 0);
   // p->_maxMode = True;
   p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
@@ -479,17 +499,17 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
     unsigned numHashBytes = 4;
     if (props.btMode)
     {
-      if (props.numHashBytes < 2)
-        numHashBytes = 2;
-      else if (props.numHashBytes < 4)
-        numHashBytes = props.numHashBytes;
+           if (props.numHashBytes <  2) numHashBytes = 2;
+      else if (props.numHashBytes <  4) numHashBytes = (unsigned)props.numHashBytes;
     }
+    if (props.numHashBytes >= 5) numHashBytes = 5;
+
     p->matchFinderBase.numHashBytes = numHashBytes;
   }
 
   p->matchFinderBase.cutValue = props.mc;
 
-  p->writeEndMark = props.writeEndMark;
+  p->writeEndMark = (BoolInt)props.writeEndMark;
 
   #ifndef _7ZIP_ST
   /*
@@ -500,6 +520,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
   }
   */
   p->multiThread = (props.numThreads > 1);
+  p->matchFinderMt.btSync.affinity =
+  p->matchFinderMt.hashSync.affinity = props.affinity;
   #endif
 
   return SZ_OK;
@@ -536,8 +558,8 @@ static void RangeEnc_Construct(CRangeEnc *p)
   p->bufBase = NULL;
 }
 
-#define RangeEnc_GetProcessed(p)       ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
-#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+#define RangeEnc_GetProcessed(p)       (        (p)->processed + (size_t)((p)->buf - (p)->bufBase) +         (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
 
 #define RC_BUF_SIZE (1 << 16)
 
@@ -578,7 +600,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
   size_t num;
   if (p->res != SZ_OK)
     return;
-  num = p->buf - p->bufBase;
+  num = (size_t)(p->buf - p->bufBase);
   if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
     p->res = SZ_ERROR_WRITE;
   p->processed += num;
@@ -656,7 +678,7 @@ static void RangeEnc_FlushData(CRangeEnc *p)
   range += newBound & mask; \
   mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
   mask += ((1 << kNumMoveBits) - 1); \
-  ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
+  ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
   *(prob) = (CLzmaProb)ttt; \
   RC_NORM(p) \
   }
@@ -749,7 +771,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
         bitCount++;
       }
     }
-    ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+    ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
     // printf("\n%3d: %5d", i, ProbPrices[i]);
   }
 }
@@ -1011,7 +1033,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
       {
         const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
         const Byte *p2 = p1 + len;
-        ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
+        ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
         const Byte *lim = p1 + numAvail;
         for (; p2 != lim && *p2 == p2[dif]; p2++)
         {}
@@ -2198,7 +2220,7 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
 
 
 
-void LzmaEnc_Construct(CLzmaEnc *p)
+static void LzmaEnc_Construct(CLzmaEnc *p)
 {
   RangeEnc_Construct(&p->rc);
   MatchFinder_Construct(&p->matchFinderBase);
@@ -2233,7 +2255,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
   return p;
 }
 
-void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
 {
   ISzAlloc_Free(alloc, p->litProbs);
   ISzAlloc_Free(alloc, p->saveState.litProbs);
@@ -2241,7 +2263,7 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
   p->saveState.litProbs = NULL;
 }
 
-void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
   #ifndef _7ZIP_ST
   MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
@@ -2259,6 +2281,7 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
 }
 
 
+MY_NO_INLINE
 static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
 {
   UInt32 nowPos32, startPos32;
@@ -2521,12 +2544,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
           // { int y; for (y = 0; y < 100; y++) {
           FillDistancesPrices(p);
           // }}
-          LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+          LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
         }
         if (p->repLenEncCounter <= 0)
         {
           p->repLenEncCounter = REP_LEN_COUNT;
-          LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+          LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
         }
       }
     
@@ -2611,7 +2634,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
   return SZ_OK;
 }
 
-void LzmaEnc_Init(CLzmaEnc *p)
+static void LzmaEnc_Init(CLzmaEnc *p)
 {
   unsigned i;
   p->state = 0;
@@ -2675,12 +2698,12 @@ void LzmaEnc_Init(CLzmaEnc *p)
 
   p->additionalOffset = 0;
 
-  p->pbMask = (1 << p->pb) - 1;
+  p->pbMask = ((unsigned)1 << p->pb) - 1;
   p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
 }
 
 
-void LzmaEnc_InitPrices(CLzmaEnc *p)
+static void LzmaEnc_InitPrices(CLzmaEnc *p)
 {
   if (!p->fastMode)
   {
@@ -2694,8 +2717,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p)
 
   p->repLenEncCounter = REP_LEN_COUNT;
 
-  LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
-  LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+  LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+  LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
 }
 
 static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
@@ -2788,12 +2811,13 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
 }
 
 
+/*
 UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
 {
   const CLzmaEnc *p = (CLzmaEnc *)pp;
   return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
 }
-
+*/
 
 const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
 {
@@ -2841,6 +2865,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
 }
 
 
+MY_NO_INLINE
 static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
 {
   SRes res = SZ_OK;
@@ -2899,14 +2924,14 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
 
   if (dictSize >= ((UInt32)1 << 22))
   {
-    UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+    const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
     if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
       dictSize = (dictSize + kDictMask) & ~kDictMask;
   }
   else for (i = 11; i <= 30; i++)
   {
-    if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
-    if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
+    if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; }
+    if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; }
   }
 
   for (i = 0; i < 4; i++)
@@ -2917,7 +2942,7 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
 
 unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
 {
-  return ((CLzmaEnc *)pp)->writeEndMark;
+  return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
 }
 
 
@@ -2974,3 +2999,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
   LzmaEnc_Destroy(p, alloc, allocBig);
   return res;
 }
+
+
+/*
+#ifndef _7ZIP_ST
+void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  lz_threads[0] = p->matchFinderMt.hashSync.thread;
+  lz_threads[1] = p->matchFinderMt.btSync.thread;
+}
+#endif
+*/
diff --git a/deps/LZMA-SDK/C/LzmaEnc.h b/deps/LZMA-SDK/C/LzmaEnc.h
index c9938f04b..26757ba6b 100644
--- a/deps/LZMA-SDK/C/LzmaEnc.h
+++ b/deps/LZMA-SDK/C/LzmaEnc.h
@@ -1,5 +1,5 @@
 /*  LzmaEnc.h -- LZMA Encoder
-2017-07-27 : Igor Pavlov : Public domain */
+2019-10-30 : Igor Pavlov : Public domain */
 
 #ifndef __LZMA_ENC_H
 #define __LZMA_ENC_H
@@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps
 
   UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
                         Encoder uses this value to reduce dictionary size */
+
+  UInt64 affinity;
 } CLzmaEncProps;
 
 void LzmaEncProps_Init(CLzmaEncProps *p);
diff --git a/deps/LZMA-SDK/C/LzmaLib.h b/deps/LZMA-SDK/C/LzmaLib.h
index 5c35e5365..4103e224a 100644
--- a/deps/LZMA-SDK/C/LzmaLib.h
+++ b/deps/LZMA-SDK/C/LzmaLib.h
@@ -1,5 +1,5 @@
 /* LzmaLib.h -- LZMA library interface
-2013-01-18 : Igor Pavlov : Public domain */
+2021-04-03 : Igor Pavlov : Public domain */
 
 #ifndef __LZMA_LIB_H
 #define __LZMA_LIB_H
@@ -40,14 +40,16 @@ outPropsSize -
 level - compression level: 0 <= level <= 9;
 
   level dictSize algo  fb
-    0:    16 KB   0    32
-    1:    64 KB   0    32
-    2:   256 KB   0    32
-    3:     1 MB   0    32
-    4:     4 MB   0    32
+    0:    64 KB   0    32
+    1:   256 KB   0    32
+    2:     1 MB   0    32
+    3:     4 MB   0    32
+    4:    16 MB   0    32
     5:    16 MB   1    32
     6:    32 MB   1    32
-    7+:   64 MB   1    64
+    7:    32 MB   1    64
+    8:    64 MB   1    64
+    9:    64 MB   1    64
  
   The default value for "level" is 5.
 
@@ -83,6 +85,11 @@ fb - Word size (the number of fast bytes).
 numThreads - The number of thereads. 1 or 2. The default value is 2.
      Fast mode (algo = 0) can use only 1 thread.
 
+In:
+  dest     - output data buffer
+  destLen  - output data buffer size
+  src      - input data
+  srcLen   - input data size
 Out:
   destLen  - processed output size
 Returns:
@@ -108,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
 LzmaUncompress
 --------------
 In:
-  dest     - output data
-  destLen  - output data size
+  dest     - output data buffer
+  destLen  - output data buffer size
   src      - input data
   srcLen   - input data size
 Out:
diff --git a/deps/LZMA-SDK/C/MtCoder.c b/deps/LZMA-SDK/C/MtCoder.c
index 5667f2d5b..85444f484 100644
--- a/deps/LZMA-SDK/C/MtCoder.c
+++ b/deps/LZMA-SDK/C/MtCoder.c
@@ -1,5 +1,5 @@
 /* MtCoder.c -- Multi-thread Coder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -7,7 +7,7 @@
 
 #ifndef _7ZIP_ST
 
-SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
+static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
 {
   CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
   UInt64 inSize2 = 0;
@@ -70,8 +70,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
   {
     t->stop = 1;
     Event_Set(&t->startEvent);
-    Thread_Wait(&t->thread);
-    Thread_Close(&t->thread);
+    Thread_Wait_Close(&t->thread);
   }
 
   Event_Close(&t->startEvent);
@@ -342,7 +341,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
   for (;;)
   {
     if (Event_Wait(&t->startEvent) != 0)
-      return SZ_ERROR_THREAD;
+      return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
     if (t->stop)
       return 0;
     {
@@ -358,7 +357,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
         unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
         if (numFinished == mtc->numStartedThreads)
           if (Event_Set(&mtc->finishedEvent) != 0)
-            return SZ_ERROR_THREAD;
+            return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
       }
       #endif
     }
diff --git a/deps/LZMA-SDK/C/MtDec.c b/deps/LZMA-SDK/C/MtDec.c
index 25a8b046d..24441b3a7 100644
--- a/deps/LZMA-SDK/C/MtDec.c
+++ b/deps/LZMA-SDK/C/MtDec.c
@@ -1,16 +1,21 @@
 /* MtDec.c -- Multi-thread Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-27 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
 // #define SHOW_DEBUG_INFO
 
 // #include <stdio.h>
+#include <string.h>
 
 #ifdef SHOW_DEBUG_INFO
 #include <stdio.h>
 #endif
 
+#include "MtDec.h"
+
+#ifndef _7ZIP_ST
+
 #ifdef SHOW_DEBUG_INFO
 #define PRF(x) x
 #else
@@ -19,10 +24,6 @@
 
 #define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
 
-#include "MtDec.h"
-
-#ifndef _7ZIP_ST
-
 void MtProgress_Init(CMtProgress *p, ICompressProgress *progress)
 {
   p->progress = progress;
@@ -77,7 +78,7 @@ void MtProgress_SetError(CMtProgress *p, SRes res)
 }
 
 
-#define RINOK_THREAD(x) RINOK(x)
+#define RINOK_THREAD(x) RINOK_WRes(x)
 
 
 static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
@@ -156,8 +157,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t)
   {
     Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */
     Event_Set(&t->canRead);
-    Thread_Wait(&t->thread);
-    Thread_Close(&t->thread);
+    Thread_Wait_Close(&t->thread);
   }
 
   Event_Close(&t->canRead);
@@ -289,12 +289,13 @@ static WRes ThreadFunc2(CMtDecThread *t)
     
     Byte *afterEndData = NULL;
     size_t afterEndData_Size = 0;
+    BoolInt afterEndData_IsCross = False;
 
     BoolInt canCreateNewThread = False;
     // CMtDecCallbackInfo parse;
     CMtDecThread *nextThread;
 
-    PRF_STR_INT("Event_Wait(&t->canRead)", t->index);
+    PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index);
 
     RINOK_THREAD(Event_Wait(&t->canRead));
     if (p->exitThread)
@@ -418,10 +419,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
               parse.srcFinished = finish;
               parse.canCreateNewThread = True;
 
-              // PRF(printf("\nParse size = %d\n", (unsigned)size))
+              PRF(printf("\nParse size = %d\n", (unsigned)size));
 
               p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse);
 
+              PRF(printf("   Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state));
+
               needWrite = True;
               canCreateNewThread = parse.canCreateNewThread;
 
@@ -478,16 +481,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
 
                 if (parse.state == MTDEC_PARSE_END)
                 {
-                  p->crossStart = 0;
-                  p->crossEnd = 0;
-
-                  if (crossSize != 0)
-                    memcpy(data + parse.srcSize, parseData + parse.srcSize, size - parse.srcSize); // we need all data
-                  afterEndData_Size = size - parse.srcSize;
                   afterEndData = parseData + parse.srcSize;
-
+                  afterEndData_Size = size - parse.srcSize;
+                  if (crossSize != 0)
+                    afterEndData_IsCross = True;
                   // we reduce data size to required bytes (parsed only)
-                  inDataSize -= (size - parse.srcSize);
+                  inDataSize -= afterEndData_Size;
                   if (!prev)
                     inDataSize_Start = parse.srcSize;
                   break;
@@ -752,13 +751,15 @@ static WRes ThreadFunc2(CMtDecThread *t)
     {
       // p->inProcessed += inCodePos;
 
+      PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size));
+
       res = p->mtCallback->Write(p->mtCallbackObject, t->index,
           res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite
-          afterEndData, afterEndData_Size,
+          afterEndData, afterEndData_Size, afterEndData_IsCross,
           &needContinue,
           &canRecode);
-      
-      // res= E_INVALIDARG; // for test
+
+      // res = SZ_ERROR_FAIL; // for test
 
       PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue));
       PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed));
@@ -847,7 +848,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
   res = ThreadFunc2(t);
   p = t->mtDec;
   if (res == 0)
-    return p->exitThreadWRes;
+    return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
   {
     // it's unexpected situation for some threading function error
     if (p->exitThreadWRes == 0)
@@ -858,15 +859,14 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
     Event_Set(&p->threads[0].canWrite);
     MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
   }
-  return res;
+  return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
 }
 
 static MY_NO_INLINE THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
 {
+  #ifdef USE_ALLOCA
   CMtDecThread *t = (CMtDecThread *)pp;
-
   // fprintf(stderr, "\n%d = %p - before", t->index, &t);
-  #ifdef USE_ALLOCA
   t->allocaPtr = alloca(t->index * 128);
   #endif
   return ThreadFunc1(pp);
@@ -1092,13 +1092,14 @@ SRes MtDec_Code(CMtDec *p)
 
   {
     WRes wres;
-    WRes sres;
+    SRes sres;
     CMtDecThread *nextThread = &p->threads[p->numStartedThreads++];
     // wres = MtDecThread_CreateAndStart(nextThread);
     wres = MtDecThread_CreateEvents(nextThread);
     if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
     if (wres == 0) { wres = Event_Set(&nextThread->canRead);
-    if (wres == 0) { wres = ThreadFunc(nextThread);
+    if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread);
+    wres = (WRes)(UINT_PTR)res;
     if (wres != 0)
     {
       p->needContinue = False;
@@ -1130,8 +1131,8 @@ SRes MtDec_Code(CMtDec *p)
       return SZ_OK;
 
     // if (sres != SZ_OK)
-      return sres;
-    // return E_FAIL;
+    return sres;
+    // return SZ_ERROR_FAIL;
   }
 }
 
diff --git a/deps/LZMA-SDK/C/MtDec.h b/deps/LZMA-SDK/C/MtDec.h
index 9864cc874..7a30b6a9e 100644
--- a/deps/LZMA-SDK/C/MtDec.h
+++ b/deps/LZMA-SDK/C/MtDec.h
@@ -1,5 +1,5 @@
 /* MtDec.h -- Multi-thread Decoder
-2018-07-04 : Igor Pavlov : Public domain */
+2020-03-05 : Igor Pavlov : Public domain */
 
 #ifndef __MT_DEC_H
 #define __MT_DEC_H
@@ -108,11 +108,12 @@ typedef struct
   */
   SRes (*Write)(void *p, unsigned coderIndex,
       BoolInt needWriteToStream,
-      const Byte *src, size_t srcSize,
+      const Byte *src, size_t srcSize, BoolInt isCross,
       // int srcFinished,
       BoolInt *needContinue,
       BoolInt *canRecode);
-} IMtDecCallback;
+
+} IMtDecCallback2;
 
 
 
@@ -132,7 +133,7 @@ typedef struct _CMtDec
   ICompressProgress *progress;
   ISzAllocPtr alloc;
 
-  IMtDecCallback *mtCallback;
+  IMtDecCallback2 *mtCallback;
   void *mtCallbackObject;
 
   
diff --git a/deps/LZMA-SDK/C/Ppmd.h b/deps/LZMA-SDK/C/Ppmd.h
index 4b9941521..ee93ecece 100644
--- a/deps/LZMA-SDK/C/Ppmd.h
+++ b/deps/LZMA-SDK/C/Ppmd.h
@@ -1,5 +1,5 @@
 /* Ppmd.h -- PPMD codec common code
-2017-04-03 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
 This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
 
 #ifndef __PPMD_H
@@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
 
 EXTERN_C_BEGIN
 
-#ifdef MY_CPU_32BIT
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+/*
+   PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
+   if (PPMD_32BIT is     defined), the PPMD code stores internal pointers to 32-bit reference fields.
+   if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
+   if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
+   if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
+     and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
+   PPMD code works slightly faster in (PPMD_32BIT) mode.
+*/
   #define PPMD_32BIT
 #endif
 
@@ -28,7 +37,7 @@ EXTERN_C_BEGIN
 #define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
 #define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
 
-#pragma pack(push, 1)
+MY_CPU_pragma_pack_push_1
 /* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
 
 /* SEE-contexts for PPM-contexts with masked symbols */
@@ -40,41 +49,114 @@ typedef struct
 } CPpmd_See;
 
 #define Ppmd_See_Update(p)  if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
-    { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
+    { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
+
 
 typedef struct
 {
   Byte Symbol;
   Byte Freq;
-  UInt16 SuccessorLow;
-  UInt16 SuccessorHigh;
+  UInt16 Successor_0;
+  UInt16 Successor_1;
 } CPpmd_State;
 
-#pragma pack(pop)
-
-typedef
-  #ifdef PPMD_32BIT
-    CPpmd_State *
-  #else
-    UInt32
-  #endif
-  CPpmd_State_Ref;
-
-typedef
-  #ifdef PPMD_32BIT
-    void *
-  #else
-    UInt32
-  #endif
-  CPpmd_Void_Ref;
-
-typedef
-  #ifdef PPMD_32BIT
-    Byte *
-  #else
-    UInt32
-  #endif
-  CPpmd_Byte_Ref;
+typedef struct CPpmd_State2_
+{
+  Byte Symbol;
+  Byte Freq;
+} CPpmd_State2;
+
+typedef struct CPpmd_State4_
+{
+  UInt16 Successor_0;
+  UInt16 Successor_1;
+} CPpmd_State4;
+
+MY_CPU_pragma_pop
+
+/*
+   PPMD code can write full CPpmd_State structure data to CPpmd*_Context
+      at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
+   
+   If we use pointers to different types, but that point to shared
+   memory space, we can have aliasing problem (strict aliasing).
+   
+   XLC compiler in -O2 mode can change the order of memory write instructions
+   in relation to read instructions, if we have use pointers to different types.
+   
+   To solve that aliasing problem we use combined CPpmd*_Context structure
+   with unions that contain the fields from both structures:
+   the original CPpmd*_Context and CPpmd_State.
+   So we can access the fields from both structures via one pointer,
+   and the compiler doesn't change the order of write instructions
+   in relation to read instructions.
+
+   If we don't use memory write instructions to shared memory in
+   some local code, and we use only reading instructions (read only),
+   then probably it's safe to use pointers to different types for reading.
+*/
+  
+
+
+#ifdef PPMD_32BIT
+
+  #define Ppmd_Ref_Type(type)   type *
+  #define Ppmd_GetRef(p, ptr)   (ptr)
+  #define Ppmd_GetPtr(p, ptr)   (ptr)
+  #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
+
+#else
+
+  #define Ppmd_Ref_Type(type)   UInt32
+  #define Ppmd_GetRef(p, ptr)   ((UInt32)((Byte *)(ptr) - (p)->Base))
+  #define Ppmd_GetPtr(p, offs)  ((void *)((p)->Base + (offs)))
+  #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
+
+#endif // PPMD_32BIT
+
+
+typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
+typedef Ppmd_Ref_Type(void)        CPpmd_Void_Ref;
+typedef Ppmd_Ref_Type(Byte)        CPpmd_Byte_Ref;
+
+
+/*
+#ifdef MY_CPU_LE_UNALIGN
+// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
+#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
+#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
+
+#else
+*/
+
+/*
+   We can write 16-bit halves to 32-bit (Successor) field in any selected order.
+   But the native order is more consistent way.
+   So we use the native order, if LE/BE order can be detected here at compile time.
+*/
+
+#ifdef MY_CPU_BE
+
+  #define Ppmd_GET_SUCCESSOR(p) \
+    ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
+
+  #define Ppmd_SET_SUCCESSOR(p, v) { \
+    (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
+    (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
+
+#else
+
+  #define Ppmd_GET_SUCCESSOR(p) \
+    ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
+
+  #define Ppmd_SET_SUCCESSOR(p, v) { \
+    (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
+    (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
+
+#endif
+
+// #endif
+
 
 #define PPMD_SetAllBitsIn256Bytes(p) \
   { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
diff --git a/deps/LZMA-SDK/C/Ppmd7.c b/deps/LZMA-SDK/C/Ppmd7.c
index 80e7de9a6..b6ecf1430 100644
--- a/deps/LZMA-SDK/C/Ppmd7.c
+++ b/deps/LZMA-SDK/C/Ppmd7.c
@@ -1,5 +1,5 @@
 /* Ppmd7.c -- PPMdH codec
-2018-07-04 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
 This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
 
 #include "Precomp.h"
@@ -8,7 +8,12 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
 
 #include "Ppmd7.h"
 
-const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */
+// #define PPMD7_ORDER_0_SUPPPORT
+ 
+MY_ALIGN(16)
+static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+MY_ALIGN(16)
 static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
 
 #define MAX_FREQ 124
@@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
 
 #define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
 #define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
-#define I2U(indx) (p->Indx2Units[indx])
+#define I2U(indx) ((unsigned)p->Indx2Units[indx])
+#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx])
 
-#ifdef PPMD_32BIT
-  #define REF(ptr) (ptr)
-#else
-  #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
-#endif
+#define REF(ptr) Ppmd_GetRef(p, ptr)
 
 #define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
 
@@ -35,13 +37,7 @@ typedef CPpmd7_Context * CTX_PTR;
 
 struct CPpmd7_Node_;
 
-typedef
-  #ifdef PPMD_32BIT
-    struct CPpmd7_Node_ *
-  #else
-    UInt32
-  #endif
-  CPpmd7_Node_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref;
 
 typedef struct CPpmd7_Node_
 {
@@ -51,17 +47,13 @@ typedef struct CPpmd7_Node_
   CPpmd7_Node_Ref Prev;
 } CPpmd7_Node;
 
-#ifdef PPMD_32BIT
-  #define NODE(ptr) (ptr)
-#else
-  #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs)))
-#endif
+#define NODE(r)  Ppmd_GetPtr_Type(p, r, CPpmd7_Node)
 
 void Ppmd7_Construct(CPpmd7 *p)
 {
   unsigned i, k, m;
 
-  p->Base = 0;
+  p->Base = NULL;
 
   for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
   {
@@ -77,6 +69,7 @@ void Ppmd7_Construct(CPpmd7 *p)
 
   for (i = 0; i < 3; i++)
     p->NS2Indx[i] = (Byte)i;
+
   for (m = i, k = 1; i < 256; i++)
   {
     p->NS2Indx[i] = (Byte)m;
@@ -84,54 +77,63 @@ void Ppmd7_Construct(CPpmd7 *p)
       k = (++m) - 2;
   }
 
-  memset(p->HB2Flag, 0, 0x40);
-  memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40);
+  memcpy(p->ExpEscape, PPMD7_kExpEscape, 16);
 }
 
+
 void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
 {
   ISzAlloc_Free(alloc, p->Base);
   p->Size = 0;
-  p->Base = 0;
+  p->Base = NULL;
 }
 
+
 BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
 {
   if (!p->Base || p->Size != size)
   {
-    size_t size2;
     Ppmd7_Free(p, alloc);
-    size2 = 0
-      #ifndef PPMD_32BIT
-      + UNIT_SIZE
-      #endif
-      ;
-    p->AlignOffset =
-      #ifdef PPMD_32BIT
-        (4 - size) & 3;
-      #else
-        4 - (size & 3);
-      #endif
-    if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0)
+    p->AlignOffset = (4 - size) & 3;
+    if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
       return False;
     p->Size = size;
   }
   return True;
 }
 
+
+
+// ---------- Internal Memory Allocator ----------
+
+/* We can use CPpmd7_Node in list of free units (as in Ppmd8)
+   But we still need one additional list walk pass in GlueFreeBlocks().
+   So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode()
+*/
+
+#define EMPTY_NODE 0
+
+
 static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
 {
   *((CPpmd_Void_Ref *)node) = p->FreeList[indx];
+  // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
+
   p->FreeList[indx] = REF(node);
+
 }
 
+
 static void *RemoveNode(CPpmd7 *p, unsigned indx)
 {
   CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
   p->FreeList[indx] = *node;
+  // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]);
+  // p->FreeList[indx] = node->Next;
   return node;
 }
 
+
 static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
 {
   unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
@@ -144,123 +146,167 @@ static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
   InsertNode(p, ptr, i);
 }
 
-static void GlueFreeBlocks(CPpmd7 *p)
+
+/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
+
+typedef union _CPpmd7_Node_Union
 {
-  #ifdef PPMD_32BIT
-  CPpmd7_Node headItem;
-  CPpmd7_Node_Ref head = &headItem;
-  #else
-  CPpmd7_Node_Ref head = p->AlignOffset + p->Size;
-  #endif
-  
-  CPpmd7_Node_Ref n = head;
-  unsigned i;
+  CPpmd7_Node     Node;
+  CPpmd7_Node_Ref NextRef;
+} CPpmd7_Node_Union;
+
+/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks()
+   we use single linked list similar to Ppmd8 code */
 
+
+static void GlueFreeBlocks(CPpmd7 *p)
+{
+  /*
+  we use first UInt16 field of 12-bytes UNITs as record type stamp
+    CPpmd_State    { Byte Symbol; Byte Freq; : Freq != 0
+    CPpmd7_Context { UInt16 NumStats;        : NumStats != 0
+    CPpmd7_Node    { UInt16 Stamp            : Stamp == 0 for free record
+                                             : Stamp == 1 for head record and guard
+    Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record.
+  */
+  CPpmd7_Node_Ref head, n = 0;
+ 
   p->GlueCount = 255;
 
-  /* create doubly-linked list of free blocks */
-  for (i = 0; i < PPMD_NUM_INDEXES; i++)
+  
+  /* we set guard NODE at LoUnit */
+  if (p->LoUnit != p->HiUnit)
+    ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1;
+
   {
-    UInt16 nu = I2U(i);
-    CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
-    p->FreeList[i] = 0;
-    while (next != 0)
+    /* Create list of free blocks.
+       We still need one additional list walk pass before Glue. */
+    unsigned i;
+    for (i = 0; i < PPMD_NUM_INDEXES; i++)
     {
-      CPpmd7_Node *node = NODE(next);
-      node->Next = n;
-      n = NODE(n)->Prev = next;
-      next = *(const CPpmd7_Node_Ref *)node;
-      node->Stamp = 0;
-      node->NU = (UInt16)nu;
+      const UInt16 nu = I2U_UInt16(i);
+      CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
+      p->FreeList[i] = 0;
+      while (next != 0)
+      {
+        /* Don't change the order of the following commands: */
+        CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next);
+        const CPpmd7_Node_Ref tmp = next;
+        next = un->NextRef;
+        un->Node.Stamp = EMPTY_NODE;
+        un->Node.NU = nu;
+        un->Node.Next = n;
+        n = tmp;
+      }
     }
   }
-  NODE(head)->Stamp = 1;
-  NODE(head)->Next = n;
-  NODE(n)->Prev = head;
-  if (p->LoUnit != p->HiUnit)
-    ((CPpmd7_Node *)p->LoUnit)->Stamp = 1;
-  
-  /* Glue free blocks */
-  while (n != head)
+
+  head = n;
+  /* Glue and Fill must walk the list in same direction */
   {
-    CPpmd7_Node *node = NODE(n);
-    UInt32 nu = (UInt32)node->NU;
-    for (;;)
+    /* Glue free blocks */
+    CPpmd7_Node_Ref *prev = &head;
+    while (n)
     {
-      CPpmd7_Node *node2 = NODE(n) + nu;
-      nu += node2->NU;
-      if (node2->Stamp != 0 || nu >= 0x10000)
-        break;
-      NODE(node2->Prev)->Next = node2->Next;
-      NODE(node2->Next)->Prev = node2->Prev;
-      node->NU = (UInt16)nu;
+      CPpmd7_Node *node = NODE(n);
+      UInt32 nu = node->NU;
+      n = node->Next;
+      if (nu == 0)
+      {
+        *prev = n;
+        continue;
+      }
+      prev = &node->Next;
+      for (;;)
+      {
+        CPpmd7_Node *node2 = node + nu;
+        nu += node2->NU;
+        if (node2->Stamp != EMPTY_NODE || nu >= 0x10000)
+          break;
+        node->NU = (UInt16)nu;
+        node2->NU = 0;
+      }
     }
-    n = node->Next;
   }
-  
+
   /* Fill lists of free blocks */
-  for (n = NODE(head)->Next; n != head;)
+  for (n = head; n != 0;)
   {
     CPpmd7_Node *node = NODE(n);
-    unsigned nu;
-    CPpmd7_Node_Ref next = node->Next;
-    for (nu = node->NU; nu > 128; nu -= 128, node += 128)
+    UInt32 nu = node->NU;
+    unsigned i;
+    n = node->Next;
+    if (nu == 0)
+      continue;
+    for (; nu > 128; nu -= 128, node += 128)
       InsertNode(p, node, PPMD_NUM_INDEXES - 1);
     if (I2U(i = U2I(nu)) != nu)
     {
       unsigned k = I2U(--i);
-      InsertNode(p, node + k, nu - k - 1);
+      InsertNode(p, node + k, (unsigned)nu - k - 1);
     }
     InsertNode(p, node, i);
-    n = next;
   }
 }
 
+
+MY_NO_INLINE
 static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
 {
   unsigned i;
-  void *retVal;
+  
   if (p->GlueCount == 0)
   {
     GlueFreeBlocks(p);
     if (p->FreeList[indx] != 0)
       return RemoveNode(p, indx);
   }
+  
   i = indx;
+  
   do
   {
     if (++i == PPMD_NUM_INDEXES)
     {
       UInt32 numBytes = U2B(I2U(indx));
+      Byte *us = p->UnitsStart;
       p->GlueCount--;
-      return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+      return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL;
     }
   }
   while (p->FreeList[i] == 0);
-  retVal = RemoveNode(p, i);
-  SplitBlock(p, retVal, i, indx);
-  return retVal;
+
+  {
+    void *block = RemoveNode(p, i);
+    SplitBlock(p, block, i, indx);
+    return block;
+  }
 }
 
+
 static void *AllocUnits(CPpmd7 *p, unsigned indx)
 {
-  UInt32 numBytes;
   if (p->FreeList[indx] != 0)
     return RemoveNode(p, indx);
-  numBytes = U2B(I2U(indx));
-  if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
   {
-    void *retVal = p->LoUnit;
-    p->LoUnit += numBytes;
-    return retVal;
+    UInt32 numBytes = U2B(I2U(indx));
+    Byte *lo = p->LoUnit;
+    if ((UInt32)(p->HiUnit - lo) >= numBytes)
+    {
+      p->LoUnit = lo + numBytes;
+      return lo;
+    }
   }
   return AllocUnitsRare(p, indx);
 }
 
+
 #define MyMem12Cpy(dest, src, num) \
-  { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \
-    do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); }
+  { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
+    do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
+
 
+/*
 static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
 {
   unsigned i0 = U2I(oldNU);
@@ -277,20 +323,25 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
   SplitBlock(p, oldPtr, i0, i1);
   return oldPtr;
 }
+*/
 
-#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
 
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
 static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
 {
-  (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
-  (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+  Ppmd_SET_SUCCESSOR(p, v);
 }
 
-static void RestartModel(CPpmd7 *p)
+
+
+MY_NO_INLINE
+static
+void RestartModel(CPpmd7 *p)
 {
-  unsigned i, k, m;
+  unsigned i, k;
 
   memset(p->FreeList, 0, sizeof(p->FreeList));
+  
   p->Text = p->Base + p->AlignOffset;
   p->HiUnit = p->Text + p->Size;
   p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
@@ -300,57 +351,110 @@ static void RestartModel(CPpmd7 *p)
   p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
   p->PrevSuccess = 0;
 
-  p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
-  p->MinContext->Suffix = 0;
-  p->MinContext->NumStats = 256;
-  p->MinContext->SummFreq = 256 + 1;
-  p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
-  p->LoUnit += U2B(256 / 2);
-  p->MinContext->Stats = REF(p->FoundState);
-  for (i = 0; i < 256; i++)
   {
-    CPpmd_State *s = &p->FoundState[i];
-    s->Symbol = (Byte)i;
-    s->Freq = 1;
-    SetSuccessor(s, 0);
+    CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+    CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+    
+    p->LoUnit += U2B(256 / 2);
+    p->MaxContext = p->MinContext = mc;
+    p->FoundState = s;
+
+    mc->NumStats = 256;
+    mc->Union2.SummFreq = 256 + 1;
+    mc->Union4.Stats = REF(s);
+    mc->Suffix = 0;
+
+    for (i = 0; i < 256; i++, s++)
+    {
+      s->Symbol = (Byte)i;
+      s->Freq = 1;
+      SetSuccessor(s, 0);
+    }
+
+    #ifdef PPMD7_ORDER_0_SUPPPORT
+    if (p->MaxOrder == 0)
+    {
+      CPpmd_Void_Ref r = REF(mc);
+      s = p->FoundState;
+      for (i = 0; i < 256; i++, s++)
+        SetSuccessor(s, r);
+      return;
+    }
+    #endif
   }
 
   for (i = 0; i < 128; i++)
+    
+    
+    
     for (k = 0; k < 8; k++)
     {
+      unsigned m;
       UInt16 *dest = p->BinSumm[i] + k;
       UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
       for (m = 0; m < 64; m += 8)
         dest[m] = val;
     }
-  
+
+    
   for (i = 0; i < 25; i++)
-    for (k = 0; k < 16; k++)
+  {
+
+    CPpmd_See *s = p->See[i];
+    
+    
+    
+    unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4));
+    for (k = 0; k < 16; k++, s++)
     {
-      CPpmd_See *s = &p->See[i][k];
-      s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4));
+      s->Summ = (UInt16)summ;
+      s->Shift = (PPMD_PERIOD_BITS - 4);
       s->Count = 4;
     }
+  }
+  
+  p->DummySee.Summ = 0; /* unused */
+  p->DummySee.Shift = PPMD_PERIOD_BITS;
+  p->DummySee.Count = 64; /* unused */
 }
 
+
 void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
 {
   p->MaxOrder = maxOrder;
+
   RestartModel(p);
-  p->DummySee.Shift = PPMD_PERIOD_BITS;
-  p->DummySee.Summ = 0; /* unused */
-  p->DummySee.Count = 64; /* unused */
 }
 
-static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
+
+
+/*
+  CreateSuccessors()
+  It's called when (FoundState->Successor) is RAW-Successor,
+  that is the link to position in Raw text.
+  So we create Context records and write the links to
+  FoundState->Successor and to identical RAW-Successors in suffix
+  contexts of MinContex.
+  
+  The function returns:
+  if (OrderFall == 0) then MinContext is already at MAX order,
+    { return pointer to new or existing context of same MAX order }
+  else
+    { return pointer to new real context that will be (Order+1) in comparison with MinContext
+
+  also it can return pointer to real context of same order,
+*/
+
+MY_NO_INLINE
+static CTX_PTR CreateSuccessors(CPpmd7 *p)
 {
-  CPpmd_State upState;
   CTX_PTR c = p->MinContext;
   CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
-  CPpmd_State *ps[PPMD7_MAX_ORDER];
+  Byte newSym, newFreq;
   unsigned numPs = 0;
-  
-  if (!skip)
+  CPpmd_State *ps[PPMD7_MAX_ORDER];
+
+  if (p->OrderFall != 0)
     ps[numPs++] = p->FoundState;
   
   while (c->Suffix)
@@ -358,44 +462,70 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
     CPpmd_Void_Ref successor;
     CPpmd_State *s;
     c = SUFFIX(c);
+    
+
     if (c->NumStats != 1)
     {
-      for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
+      Byte sym = p->FoundState->Symbol;
+      for (s = STATS(c); s->Symbol != sym; s++);
+
     }
     else
+    {
       s = ONE_STATE(c);
+
+    }
     successor = SUCCESSOR(s);
     if (successor != upBranch)
     {
+      // (c) is real record Context here,
       c = CTX(successor);
       if (numPs == 0)
+      {
+        // (c) is real record MAX Order Context here,
+        // So we don't need to create any new contexts.
         return c;
+      }
       break;
     }
     ps[numPs++] = s;
   }
   
-  upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
-  SetSuccessor(&upState, upBranch + 1);
+  // All created contexts will have single-symbol with new RAW-Successor
+  // All new RAW-Successors will point to next position in RAW text
+  // after FoundState->Successor
+
+  newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
+  upBranch++;
+  
   
   if (c->NumStats == 1)
-    upState.Freq = ONE_STATE(c)->Freq;
+    newFreq = ONE_STATE(c)->Freq;
   else
   {
     UInt32 cf, s0;
     CPpmd_State *s;
-    for (s = STATS(c); s->Symbol != upState.Symbol; s++);
-    cf = s->Freq - 1;
-    s0 = c->SummFreq - c->NumStats - cf;
-    upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0))));
+    for (s = STATS(c); s->Symbol != newSym; s++);
+    cf = (UInt32)s->Freq - 1;
+    s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
+    /*
+      cf - is frequency of symbol that will be Successor in new context records.
+      s0 - is commulative frequency sum of another symbols from parent context.
+      max(newFreq)= (s->Freq + 1), when (s0 == 1)
+      we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[]
+      so (s->Freq < 128) - is requirement for multi-symbol contexts
+    */
+    newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1));
   }
 
+  // Create new single-symbol contexts from low order to high order in loop
+
   do
   {
-    /* Create Child */
-    CTX_PTR c1; /* = AllocContext(p); */
+    CTX_PTR c1;
+    /* = AllocContext(p); */
     if (p->HiUnit != p->LoUnit)
-      c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+      c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
     else if (p->FreeList[0] != 0)
       c1 = (CTX_PTR)RemoveNode(p, 0);
     else
@@ -404,8 +534,11 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
       if (!c1)
         return NULL;
     }
+    
     c1->NumStats = 1;
-    *ONE_STATE(c1) = upState;
+    ONE_STATE(c1)->Symbol = newSym;
+    ONE_STATE(c1)->Freq = newFreq;
+    SetSuccessor(ONE_STATE(c1), upBranch);
     c1->Suffix = REF(c);
     SetSuccessor(ps[--numPs], REF(c1));
     c = c1;
@@ -415,21 +548,26 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
   return c;
 }
 
-static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
-{
-  CPpmd_State tmp = *t1;
-  *t1 = *t2;
-  *t2 = tmp;
-}
 
-static void UpdateModel(CPpmd7 *p)
+
+#define SwapStates(s) \
+  { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
+
+
+void Ppmd7_UpdateModel(CPpmd7 *p);
+MY_NO_INLINE
+void Ppmd7_UpdateModel(CPpmd7 *p)
 {
-  CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
-  CTX_PTR c;
+  CPpmd_Void_Ref maxSuccessor, minSuccessor;
+  CTX_PTR c, mc;
   unsigned s0, ns;
-  
+
+
+
   if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
   {
+    /* Update Freqs in Suffix Context */
+
     c = SUFFIX(p->MinContext);
     
     if (c->NumStats == 1)
@@ -441,27 +579,39 @@ static void UpdateModel(CPpmd7 *p)
     else
     {
       CPpmd_State *s = STATS(c);
-      if (s->Symbol != p->FoundState->Symbol)
+      Byte sym = p->FoundState->Symbol;
+      
+      if (s->Symbol != sym)
       {
-        do { s++; } while (s->Symbol != p->FoundState->Symbol);
+        do
+        {
+          // s++; if (s->Symbol == sym) break;
+          s++;
+        }
+        while (s->Symbol != sym);
+        
         if (s[0].Freq >= s[-1].Freq)
         {
-          SwapStates(&s[0], &s[-1]);
+          SwapStates(s);
           s--;
         }
       }
+
       if (s->Freq < MAX_FREQ - 9)
       {
-        s->Freq += 2;
-        c->SummFreq += 2;
+        s->Freq = (Byte)(s->Freq + 2);
+        c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
       }
     }
   }
 
+  
   if (p->OrderFall == 0)
   {
-    p->MinContext = p->MaxContext = CreateSuccessors(p, True);
-    if (p->MinContext == 0)
+    /* MAX ORDER context */
+    /* (FoundState->Successor) is RAW-Successor. */
+    p->MaxContext = p->MinContext = CreateSuccessors(p);
+    if (!p->MinContext)
     {
       RestartModel(p);
       return;
@@ -469,45 +619,93 @@ static void UpdateModel(CPpmd7 *p)
     SetSuccessor(p->FoundState, REF(p->MinContext));
     return;
   }
+
+  
+  /* NON-MAX ORDER context */
   
-  *p->Text++ = p->FoundState->Symbol;
-  successor = REF(p->Text);
-  if (p->Text >= p->UnitsStart)
   {
-    RestartModel(p);
-    return;
+    Byte *text = p->Text;
+    *text++ = p->FoundState->Symbol;
+    p->Text = text;
+    if (text >= p->UnitsStart)
+    {
+      RestartModel(p);
+      return;
+    }
+    maxSuccessor = REF(text);
   }
   
-  if (fSuccessor)
+  minSuccessor = SUCCESSOR(p->FoundState);
+
+  if (minSuccessor)
   {
-    if (fSuccessor <= successor)
+    // there is Successor for FoundState in MinContext.
+    // So the next context will be one order higher than MinContext.
+    
+    if (minSuccessor <= maxSuccessor)
     {
-      CTX_PTR cs = CreateSuccessors(p, False);
-      if (cs == NULL)
+      // minSuccessor is RAW-Successor. So we will create real contexts records:
+      CTX_PTR cs = CreateSuccessors(p);
+      if (!cs)
       {
         RestartModel(p);
         return;
       }
-      fSuccessor = REF(cs);
+      minSuccessor = REF(cs);
     }
+
+    // minSuccessor now is real Context pointer that points to existing (Order+1) context
+    
     if (--p->OrderFall == 0)
     {
-      successor = fSuccessor;
+      /*
+      if we move to MaxOrder context, then minSuccessor will be common Succesor for both:
+        MinContext that is (MaxOrder - 1)
+        MaxContext that is (MaxOrder)
+      so we don't need new RAW-Successor, and we can use real minSuccessor
+      as succssors for both MinContext and MaxContext.
+      */
+      maxSuccessor = minSuccessor;
+      
+      /*
+      if (MaxContext != MinContext)
+      {
+        there was order fall from MaxOrder and we don't need current symbol
+        to transfer some RAW-Succesors to real contexts.
+        So we roll back pointer in raw data for one position.
+      }
+      */
       p->Text -= (p->MaxContext != p->MinContext);
     }
   }
   else
   {
-    SetSuccessor(p->FoundState, successor);
-    fSuccessor = REF(p->MinContext);
+    /*
+    FoundState has NULL-Successor here.
+    And only root 0-order context can contain NULL-Successors.
+    We change Successor in FoundState to RAW-Successor,
+    And next context will be same 0-order root Context.
+    */
+    SetSuccessor(p->FoundState, maxSuccessor);
+    minSuccessor = REF(p->MinContext);
   }
-  
-  s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1);
-  
-  for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c))
+
+  mc = p->MinContext;
+  c = p->MaxContext;
+
+  p->MaxContext = p->MinContext = CTX(minSuccessor);
+
+  if (c == mc)
+    return;
+
+  // s0 : is pure Escape Freq
+  s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1);
+
+  do
   {
     unsigned ns1;
-    UInt32 cf, sf;
+    UInt32 sum;
+    
     if ((ns1 = c->NumStats) != 1)
     {
       if ((ns1 & 1) == 0)
@@ -527,80 +725,127 @@ static void UpdateModel(CPpmd7 *p)
           oldPtr = STATS(c);
           MyMem12Cpy(ptr, oldPtr, oldNU);
           InsertNode(p, oldPtr, i);
-          c->Stats = STATS_REF(ptr);
+          c->Union4.Stats = STATS_REF(ptr);
         }
       }
-      c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1)));
+      sum = c->Union2.SummFreq;
+      /* max increase of Escape_Freq is 3 here.
+         total increase of Union2.SummFreq for all symbols is less than 256 here */
+      sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
+      /* original PPMdH uses 16-bit variable for (sum) here.
+         But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
+      // sum = (UInt16)sum;
     }
     else
     {
+      // instead of One-symbol context we create 2-symbol context
       CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
       if (!s)
       {
         RestartModel(p);
         return;
       }
-      *s = *ONE_STATE(c);
-      c->Stats = REF(s);
-      if (s->Freq < MAX_FREQ / 4 - 1)
-        s->Freq <<= 1;
-      else
-        s->Freq = MAX_FREQ - 4;
-      c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3));
-    }
-    cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6);
-    sf = (UInt32)s0 + c->SummFreq;
-    if (cf < 6 * sf)
-    {
-      cf = 1 + (cf > sf) + (cf >= 4 * sf);
-      c->SummFreq += 3;
-    }
-    else
-    {
-      cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
-      c->SummFreq = (UInt16)(c->SummFreq + cf);
+      {
+        unsigned freq = c->Union2.State2.Freq;
+        // s = *ONE_STATE(c);
+        s->Symbol = c->Union2.State2.Symbol;
+        s->Successor_0 = c->Union4.State4.Successor_0;
+        s->Successor_1 = c->Union4.State4.Successor_1;
+        // SetSuccessor(s, c->Union4.Stats);  // call it only for debug purposes to check the order of
+                                              // (Successor_0 and Successor_1) in LE/BE.
+        c->Union4.Stats = REF(s);
+        if (freq < MAX_FREQ / 4 - 1)
+          freq <<= 1;
+        else
+          freq = MAX_FREQ - 4;
+        // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
+        s->Freq = (Byte)freq;
+        // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
+        sum = freq + p->InitEsc + (ns > 3);
+      }
     }
+    
     {
       CPpmd_State *s = STATS(c) + ns1;
-      SetSuccessor(s, successor);
+      UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq;
+      UInt32 sf = (UInt32)s0 + sum;
       s->Symbol = p->FoundState->Symbol;
-      s->Freq = (Byte)cf;
       c->NumStats = (UInt16)(ns1 + 1);
+      SetSuccessor(s, maxSuccessor);
+      
+      if (cf < 6 * sf)
+      {
+        cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf);
+        sum += 3;
+        /* It can add (0, 1, 2) to Escape_Freq */
+      }
+      else
+      {
+        cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
+        sum += cf;
+      }
+     
+      c->Union2.SummFreq = (UInt16)sum;
+      s->Freq = (Byte)cf;
     }
+    c = SUFFIX(c);
   }
-  p->MaxContext = p->MinContext = CTX(fSuccessor);
+  while (c != mc);
 }
   
+
+
+MY_NO_INLINE
 static void Rescale(CPpmd7 *p)
 {
   unsigned i, adder, sumFreq, escFreq;
   CPpmd_State *stats = STATS(p->MinContext);
   CPpmd_State *s = p->FoundState;
+
+  /* Sort the list by Freq */
+  if (s != stats)
   {
     CPpmd_State tmp = *s;
-    for (; s != stats; s--)
+    do
       s[0] = s[-1];
+    while (--s != stats);
     *s = tmp;
   }
-  escFreq = p->MinContext->SummFreq - s->Freq;
-  s->Freq += 4;
-  adder = (p->OrderFall != 0);
-  s->Freq = (Byte)((s->Freq + adder) >> 1);
+
   sumFreq = s->Freq;
+  escFreq = p->MinContext->Union2.SummFreq - sumFreq;
+  
+  /*
+  if (p->OrderFall == 0), adder = 0 : it's     allowed to remove symbol from     MAX Order context
+  if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context
+  */
+
+  adder = (p->OrderFall != 0);
+
+  #ifdef PPMD7_ORDER_0_SUPPPORT
+  adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context
+  #endif
+
+  sumFreq = (sumFreq + 4 + adder) >> 1;
+  i = (unsigned)p->MinContext->NumStats - 1;
+  s->Freq = (Byte)sumFreq;
   
-  i = p->MinContext->NumStats - 1;
   do
   {
-    escFreq -= (++s)->Freq;
-    s->Freq = (Byte)((s->Freq + adder) >> 1);
-    sumFreq += s->Freq;
-    if (s[0].Freq > s[-1].Freq)
+    unsigned freq = (++s)->Freq;
+    escFreq -= freq;
+    freq = (freq + adder) >> 1;
+    sumFreq += freq;
+    s->Freq = (Byte)freq;
+    if (freq > s[-1].Freq)
     {
+      CPpmd_State tmp = *s;
       CPpmd_State *s1 = s;
-      CPpmd_State tmp = *s1;
       do
+      {
         s1[0] = s1[-1];
-      while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+      }
+      while (--s1 != stats && freq > s1[-1].Freq);
       *s1 = tmp;
     }
   }
@@ -608,47 +853,89 @@ static void Rescale(CPpmd7 *p)
   
   if (s->Freq == 0)
   {
-    unsigned numStats = p->MinContext->NumStats;
-    unsigned n0, n1;
-    do { i++; } while ((--s)->Freq == 0);
+    /* Remove all items with Freq == 0 */
+    CPpmd7_Context *mc;
+    unsigned numStats, numStatsNew, n0, n1;
+    
+    i = 0; do { i++; } while ((--s)->Freq == 0);
+    
+    /* We increase (escFreq) for the number of removed symbols.
+       So we will have (0.5) increase for Escape_Freq in avarage per
+       removed symbol after Escape_Freq halving */
     escFreq += i;
-    p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i);
-    if (p->MinContext->NumStats == 1)
+    mc = p->MinContext;
+    numStats = mc->NumStats;
+    numStatsNew = numStats - i;
+    mc->NumStats = (UInt16)(numStatsNew);
+    n0 = (numStats + 1) >> 1;
+    
+    if (numStatsNew == 1)
     {
-      CPpmd_State tmp = *stats;
+      /* Create Single-Symbol context */
+      unsigned freq = stats->Freq;
+      
       do
       {
-        tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1));
         escFreq >>= 1;
+        freq = (freq + 1) >> 1;
       }
       while (escFreq > 1);
-      InsertNode(p, stats, U2I(((numStats + 1) >> 1)));
-      *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+
+      s = ONE_STATE(mc);
+      *s = *stats;
+      s->Freq = (Byte)freq; // (freq <= 260 / 4)
+      p->FoundState = s;
+      InsertNode(p, stats, U2I(n0));
       return;
     }
-    n0 = (numStats + 1) >> 1;
-    n1 = (p->MinContext->NumStats + 1) >> 1;
+    
+    n1 = (numStatsNew + 1) >> 1;
     if (n0 != n1)
-      p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+    {
+      // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+      unsigned i0 = U2I(n0);
+      unsigned i1 = U2I(n1);
+      if (i0 != i1)
+      {
+        if (p->FreeList[i1] != 0)
+        {
+          void *ptr = RemoveNode(p, i1);
+          p->MinContext->Union4.Stats = STATS_REF(ptr);
+          MyMem12Cpy(ptr, (const void *)stats, n1);
+          InsertNode(p, stats, i0);
+        }
+        else
+          SplitBlock(p, stats, i0, i1);
+      }
+    }
+  }
+  {
+    CPpmd7_Context *mc = p->MinContext;
+    mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+    // Escape_Freq halving here
+    p->FoundState = STATS(mc);
   }
-  p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
-  p->FoundState = STATS(p->MinContext);
 }
 
+
 CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
 {
   CPpmd_See *see;
-  unsigned nonMasked = p->MinContext->NumStats - numMasked;
-  if (p->MinContext->NumStats != 256)
+  const CPpmd7_Context *mc = p->MinContext;
+  unsigned numStats = mc->NumStats;
+  if (numStats != 256)
   {
-    see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] +
-        (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) +
-        2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) +
-        4 * (unsigned)(numMasked > nonMasked) +
+    unsigned nonMasked = numStats - numMasked;
+    see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+        + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats)
+        + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats)
+        + 4 * (unsigned)(numMasked > nonMasked) +
         p->HiBitsFlag;
     {
-      unsigned r = (see->Summ >> see->Shift);
-      see->Summ = (UInt16)(see->Summ - r);
+      // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+      unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+      unsigned r = (summ >> see->Shift);
+      see->Summ = (UInt16)(summ - r);
       *escFreq = r + (r == 0);
     }
   }
@@ -660,53 +947,158 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
   return see;
 }
 
+
 static void NextContext(CPpmd7 *p)
 {
   CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
-  if (p->OrderFall == 0 && (Byte *)c > p->Text)
-    p->MinContext = p->MaxContext = c;
+  if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+    p->MaxContext = p->MinContext = c;
   else
-    UpdateModel(p);
+    Ppmd7_UpdateModel(p);
 }
 
+
 void Ppmd7_Update1(CPpmd7 *p)
 {
   CPpmd_State *s = p->FoundState;
-  s->Freq += 4;
-  p->MinContext->SummFreq += 4;
-  if (s[0].Freq > s[-1].Freq)
+  unsigned freq = s->Freq;
+  freq += 4;
+  p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+  s->Freq = (Byte)freq;
+  if (freq > s[-1].Freq)
   {
-    SwapStates(&s[0], &s[-1]);
+    SwapStates(s);
     p->FoundState = --s;
-    if (s->Freq > MAX_FREQ)
+    if (freq > MAX_FREQ)
       Rescale(p);
   }
   NextContext(p);
 }
 
+
 void Ppmd7_Update1_0(CPpmd7 *p)
 {
-  p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq);
-  p->RunLength += p->PrevSuccess;
-  p->MinContext->SummFreq += 4;
-  if ((p->FoundState->Freq += 4) > MAX_FREQ)
+  CPpmd_State *s = p->FoundState;
+  CPpmd7_Context *mc = p->MinContext;
+  unsigned freq = s->Freq;
+  unsigned summFreq = mc->Union2.SummFreq;
+  p->PrevSuccess = (2 * freq > summFreq);
+  p->RunLength += (int)p->PrevSuccess;
+  mc->Union2.SummFreq = (UInt16)(summFreq + 4);
+  freq += 4;
+  s->Freq = (Byte)freq;
+  if (freq > MAX_FREQ)
     Rescale(p);
   NextContext(p);
 }
 
+
+/*
 void Ppmd7_UpdateBin(CPpmd7 *p)
 {
-  p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0));
+  unsigned freq = p->FoundState->Freq;
+  p->FoundState->Freq = (Byte)(freq + (freq < 128));
   p->PrevSuccess = 1;
   p->RunLength++;
   NextContext(p);
 }
+*/
 
 void Ppmd7_Update2(CPpmd7 *p)
 {
-  p->MinContext->SummFreq += 4;
-  if ((p->FoundState->Freq += 4) > MAX_FREQ)
-    Rescale(p);
+  CPpmd_State *s = p->FoundState;
+  unsigned freq = s->Freq;
+  freq += 4;
   p->RunLength = p->InitRL;
-  UpdateModel(p);
+  p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+  s->Freq = (Byte)freq;
+  if (freq > MAX_FREQ)
+    Rescale(p);
+  Ppmd7_UpdateModel(p);
+}
+
+
+
+/*
+PPMd Memory Map:
+{
+  [ 0 ]           contains subset of original raw text, that is required to create context
+                  records, Some symbols are not written, when max order context was reached
+  [ Text ]        free area
+  [ UnitsStart ]  CPpmd_State vectors and CPpmd7_Context records
+  [ LoUnit ]      free  area for CPpmd_State and CPpmd7_Context items
+[ HiUnit ]      CPpmd7_Context records
+  [ Size ]        end of array
 }
+
+These addresses don't cross at any time.
+And the following condtions is true for addresses:
+  (0  <= Text < UnitsStart <= LoUnit <= HiUnit <= Size)
+
+Raw text is BYTE--aligned.
+the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs.
+
+Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
+The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
+The code doesn't free UNITs allocated for CPpmd7_Context records.
+
+The code calls RestartModel(), when there is no free memory for allocation.
+And RestartModel() changes the state to orignal start state, with full free block.
+
+
+The code allocates UNITs with the following order:
+
+Allocation of 1 UNIT for Context record
+  - from free space (HiUnit) down to (LoUnit)
+  - from FreeList[0]
+  - AllocUnitsRare()
+
+AllocUnits() for CPpmd_State vectors:
+  - from FreeList[i]
+  - from free space (LoUnit) up to (HiUnit)
+  - AllocUnitsRare()
+
+AllocUnitsRare()
+  - if (GlueCount == 0)
+       {  Glue lists, GlueCount = 255, allocate from FreeList[i]] }
+  - loop for all higher sized FreeList[...] lists
+  - from (UnitsStart - Text), GlueCount--
+  - ERROR
+
+
+Each Record with Context contains the CPpmd_State vector, where each
+CPpmd_State contains the link to Successor.
+There are 3 types of Successor:
+  1) NULL-Successor   - NULL pointer. NULL-Successor links can be stored
+                        only in 0-order Root Context Record.
+                        We use 0 value as NULL-Successor
+  2) RAW-Successor    - the link to position in raw text,
+                        that "RAW-Successor" is being created after first
+                        occurrence of new symbol for some existing context record.
+                        (RAW-Successor > 0).
+  3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1),
+                        that record is being created when we go via RAW-Successor again.
+
+For any successors at any time: the following condtions are true for Successor links:
+(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor)
+
+
+---------- Symbol Frequency, SummFreq and Range in Range_Coder ----------
+
+CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq
+
+The PPMd code tries to fulfill the condition:
+  (SummFreq <= (256 * 128 = RC::kBot))
+
+We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
+So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
+If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
+SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions.
+Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for
+max-order context.
+
+When the PPMd code still break (Total <= RC::Range) condition in range coder,
+we have two ways to resolve that problem:
+  1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
+  2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
+*/
diff --git a/deps/LZMA-SDK/C/Ppmd7.h b/deps/LZMA-SDK/C/Ppmd7.h
index cce93f120..297e35fe9 100644
--- a/deps/LZMA-SDK/C/Ppmd7.h
+++ b/deps/LZMA-SDK/C/Ppmd7.h
@@ -1,10 +1,8 @@
-/* Ppmd7.h -- PPMdH compression codec
-2018-07-04 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
-
-/* This code supports virtual RangeDecoder and includes the implementation
-of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
-If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
+/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+  PPMd var.H (2001): Dmitry Shkarin : Public domain */
+ 
 
 #ifndef __PPMD7_H
 #define __PPMD7_H
@@ -21,23 +19,56 @@ EXTERN_C_BEGIN
 
 struct CPpmd7_Context_;
 
-typedef
-  #ifdef PPMD_32BIT
-    struct CPpmd7_Context_ *
-  #else
-    UInt32
-  #endif
-  CPpmd7_Context_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref;
+
+// MY_CPU_pragma_pack_push_1
 
 typedef struct CPpmd7_Context_
 {
   UInt16 NumStats;
-  UInt16 SummFreq;
-  CPpmd_State_Ref Stats;
+
+
+  union
+  {
+    UInt16 SummFreq;
+    CPpmd_State2 State2;
+  } Union2;
+
+  union
+  {
+    CPpmd_State_Ref Stats;
+    CPpmd_State4 State4;
+  } Union4;
+
   CPpmd7_Context_Ref Suffix;
 } CPpmd7_Context;
 
-#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+// MY_CPU_pragma_pop
+
+#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
+
+
+
+
+typedef struct
+{
+  UInt32 Range;
+  UInt32 Code;
+  UInt32 Low;
+  IByteIn *Stream;
+} CPpmd7_RangeDec;
+
+
+typedef struct
+{
+  UInt32 Range;
+  Byte Cache;
+  // Byte _dummy_[3];
+  UInt64 Low;
+  UInt64 CacheSize;
+  IByteOut *Stream;
+} CPpmd7z_RangeEnc;
+
 
 typedef struct
 {
@@ -48,17 +79,30 @@ typedef struct
 
   UInt32 Size;
   UInt32 GlueCount;
-  Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
   UInt32 AlignOffset;
+  Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
 
-  Byte Indx2Units[PPMD_NUM_INDEXES];
+
+  
+  
+  union
+  {
+    CPpmd7_RangeDec dec;
+    CPpmd7z_RangeEnc enc;
+  } rc;
+  
+  Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
   Byte Units2Indx[128];
   CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
-  Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
+
+  Byte NS2BSIndx[256], NS2Indx[256];
+  Byte ExpEscape[16];
   CPpmd_See DummySee, See[25][16];
   UInt16 BinSumm[128][64];
+  // int LastSymbol;
 } CPpmd7;
 
+
 void Ppmd7_Construct(CPpmd7 *p);
 BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
 void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
@@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
 
 /* ---------- Internal Functions ---------- */
 
-extern const Byte PPMD7_kExpEscape[16];
-
-#ifdef PPMD_32BIT
-  #define Ppmd7_GetPtr(p, ptr) (ptr)
-  #define Ppmd7_GetContext(p, ptr) (ptr)
-  #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
-#else
-  #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
-  #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
-  #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
-#endif
+#define Ppmd7_GetPtr(p, ptr)     Ppmd_GetPtr(p, ptr)
+#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context)
+#define Ppmd7_GetStats(p, ctx)   Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
 
 void Ppmd7_Update1(CPpmd7 *p);
 void Ppmd7_Update1_0(CPpmd7 *p);
 void Ppmd7_Update2(CPpmd7 *p);
-void Ppmd7_UpdateBin(CPpmd7 *p);
+
+#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3))
+#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4))
+// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3))
+// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4))
 
 #define Ppmd7_GetBinSumm(p) \
-    &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
-    p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
-    (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
-    2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
-    ((p->RunLength >> 26) & 0x20)]
+    &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \
+    [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+    + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \
+    + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \
+    + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ]
 
 CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
 
 
+/*
+We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure:
+  1) Ppmd7a_*: original PPMdH
+  2) Ppmd7z_*: modified PPMdH with 7z Range Coder
+Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH)
+*/
+
 /* ---------- Decode ---------- */
 
-typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
+#define PPMD7_SYM_END    (-1)
+#define PPMD7_SYM_ERROR  (-2)
 
-struct IPpmd7_RangeDec
-{
-  UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
-  void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
-  UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
-};
+/*
+You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init()
 
-typedef struct
-{
-  IPpmd7_RangeDec vt;
-  UInt32 Range;
-  UInt32 Code;
-  IByteIn *Stream;
-} CPpmd7z_RangeDec;
+Ppmd7*_DecodeSymbol()
+out:
+  >= 0 : decoded byte
+    -1 : PPMD7_SYM_END   : End of payload marker
+    -2 : PPMD7_SYM_ERROR : Data error
+*/
 
-void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
-BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
-#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+/* Ppmd7a_* : original PPMdH */
+BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p);
+#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+int Ppmd7a_DecodeSymbol(CPpmd7 *p);
 
-int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
+/* Ppmd7z_* : modified PPMdH with 7z Range Coder */
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p);
+#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+int Ppmd7z_DecodeSymbol(CPpmd7 *p);
+// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim);
 
 
 /* ---------- Encode ---------- */
 
-typedef struct
-{
-  UInt64 Low;
-  UInt32 Range;
-  Byte Cache;
-  UInt64 CacheSize;
-  IByteOut *Stream;
-} CPpmd7z_RangeEnc;
-
-void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
-void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
-
-void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
+void Ppmd7z_Init_RangeEnc(CPpmd7 *p);
+void Ppmd7z_Flush_RangeEnc(CPpmd7 *p);
+// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol);
+void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim);
 
 EXTERN_C_END
  
diff --git a/deps/LZMA-SDK/C/Ppmd7Dec.c b/deps/LZMA-SDK/C/Ppmd7Dec.c
index 202640710..a18f0b873 100644
--- a/deps/LZMA-SDK/C/Ppmd7Dec.c
+++ b/deps/LZMA-SDK/C/Ppmd7Dec.c
@@ -1,6 +1,8 @@
-/* Ppmd7Dec.c -- PPMdH Decoder
-2018-07-04 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+  PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
 
 #include "Precomp.h"
 
@@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
 
 #define kTopValue (1 << 24)
 
-BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
+
+#define READ_BYTE(p) IByteIn_Read((p)->Stream)
+
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
 {
   unsigned i;
   p->Code = 0;
   p->Range = 0xFFFFFFFF;
-  if (IByteIn_Read(p->Stream) != 0)
+  if (READ_BYTE(p) != 0)
     return False;
   for (i = 0; i < 4; i++)
-    p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
+    p->Code = (p->Code << 8) | READ_BYTE(p);
   return (p->Code < 0xFFFFFFFF);
 }
 
-#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
- 
-static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
-{
-  GET_Ppmd7z_RangeDec
-  return p->Code / (p->Range /= total);
-}
+#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \
+  { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8;
 
-static void Range_Normalize(CPpmd7z_RangeDec *p)
-{
-  if (p->Range < kTopValue)
-  {
-    p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
-    p->Range <<= 8;
-    if (p->Range < kTopValue)
-    {
-      p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
-      p->Range <<= 8;
-    }
-  }
-}
+#define RC_NORM_1(p)  RC_NORM_BASE(p) }
+#define RC_NORM(p)    RC_NORM_BASE(p) RC_NORM_BASE(p) }}
 
-static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
-{
-  GET_Ppmd7z_RangeDec
-  p->Code -= start * p->Range;
-  p->Range *= size;
-  Range_Normalize(p);
-}
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p)    // RC_NORM(p)
+#define RC_NORM_REMOTE(p)   RC_NORM(p)
 
-static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
-{
-  GET_Ppmd7z_RangeDec
-  UInt32 newBound = (p->Range >> 14) * size0;
-  UInt32 symbol;
-  if (p->Code < newBound)
-  {
-    symbol = 0;
-    p->Range = newBound;
-  }
-  else
-  {
-    symbol = 1;
-    p->Code -= newBound;
-    p->Range -= newBound;
-  }
-  Range_Normalize(p);
-  return symbol;
-}
+#define R (&p->rc.dec)
 
-void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
 {
-  p->vt.GetThreshold = Range_GetThreshold;
-  p->vt.Decode = Range_Decode;
-  p->vt.DecodeBit = Range_DecodeBit;
+
+  
+  R->Code -= start * R->Range;
+  R->Range *= size;
+  RC_NORM_LOCAL(R)
 }
 
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
 
-#define MASK(sym) ((signed char *)charMask)[sym]
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd7_UpdateModel(CPpmd7 *p);
 
-int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+// MY_FORCE_INLINE
+// static
+int Ppmd7z_DecodeSymbol(CPpmd7 *p)
 {
   size_t charMask[256 / sizeof(size_t)];
+
   if (p->MinContext->NumStats != 1)
   {
     CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
     unsigned i;
     UInt32 count, hiCnt;
-    if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+    UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+    
+    
+    
+    count = RC_GetThreshold(summFreq);
+    hiCnt = count;
+    
+    if ((Int32)(count -= s->Freq) < 0)
     {
-      Byte symbol;
-      rc->Decode(rc, 0, s->Freq);
+      Byte sym;
+      RC_DecodeFinal(0, s->Freq);
       p->FoundState = s;
-      symbol = s->Symbol;
+      sym = s->Symbol;
       Ppmd7_Update1_0(p);
-      return symbol;
+      return sym;
     }
+  
     p->PrevSuccess = 0;
-    i = p->MinContext->NumStats - 1;
+    i = (unsigned)p->MinContext->NumStats - 1;
+    
     do
     {
-      if ((hiCnt += (++s)->Freq) > count)
+      if ((Int32)(count -= (++s)->Freq) < 0)
       {
-        Byte symbol;
-        rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+        Byte sym;
+        RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
         p->FoundState = s;
-        symbol = s->Symbol;
+        sym = s->Symbol;
         Ppmd7_Update1(p);
-        return symbol;
+        return sym;
       }
     }
     while (--i);
-    if (count >= p->MinContext->SummFreq)
-      return -2;
-    p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
-    rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
+    
+    if (hiCnt >= summFreq)
+      return PPMD7_SYM_ERROR;
+    
+    hiCnt -= count;
+    RC_Decode(hiCnt, summFreq - hiCnt);
+
+    p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
     PPMD_SetAllBitsIn256Bytes(charMask);
-    MASK(s->Symbol) = 0;
-    i = p->MinContext->NumStats - 1;
-    do { MASK((--s)->Symbol) = 0; } while (--i);
+    // i = p->MinContext->NumStats - 1;
+    // do { MASK((--s)->Symbol) = 0; } while (--i);
+    {
+      CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+      MASK(s->Symbol) = 0;
+      do
+      {
+        unsigned sym0 = s2[0].Symbol;
+        unsigned sym1 = s2[1].Symbol;
+        s2 += 2;
+        MASK(sym0) = 0;
+        MASK(sym1) = 0;
+      }
+      while (s2 < s);
+    }
   }
   else
   {
+    CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
     UInt16 *prob = Ppmd7_GetBinSumm(p);
-    if (rc->DecodeBit(rc, *prob) == 0)
+    UInt32 pr = *prob;
+    UInt32 size0 = (R->Range >> 14) * pr;
+    pr = PPMD_UPDATE_PROB_1(pr);
+
+    if (R->Code < size0)
     {
-      Byte symbol;
-      *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
-      symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
-      Ppmd7_UpdateBin(p);
-      return symbol;
+      Byte sym;
+      *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+      
+      // RangeDec_DecodeBit0(size0);
+      R->Range = size0;
+      RC_NORM_1(R)
+      /* we can use single byte normalization here because of
+         (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */
+
+      // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+      // Ppmd7_UpdateBin(p);
+      {
+        unsigned freq = s->Freq;
+        CTX_PTR c = CTX(SUCCESSOR(s));
+        sym = s->Symbol;
+        p->FoundState = s;
+        p->PrevSuccess = 1;
+        p->RunLength++;
+        s->Freq = (Byte)(freq + (freq < 128));
+        // NextContext(p);
+        if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+          p->MaxContext = p->MinContext = c;
+        else
+          Ppmd7_UpdateModel(p);
+      }
+      return sym;
     }
-    *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
-    p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
+
+    *prob = (UInt16)pr;
+    p->InitEsc = p->ExpEscape[pr >> 10];
+
+    // RangeDec_DecodeBit1(size0);
+    
+    R->Code -= size0;
+    R->Range -= size0;
+    RC_NORM_LOCAL(R)
+    
     PPMD_SetAllBitsIn256Bytes(charMask);
     MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
     p->PrevSuccess = 0;
   }
+
   for (;;)
   {
-    CPpmd_State *ps[256], *s;
+    CPpmd_State *s, *s2;
     UInt32 freqSum, count, hiCnt;
+
     CPpmd_See *see;
-    unsigned i, num, numMasked = p->MinContext->NumStats;
+    CPpmd7_Context *mc;
+    unsigned numMasked;
+    RC_NORM_REMOTE(R)
+    mc = p->MinContext;
+    numMasked = mc->NumStats;
+
     do
     {
       p->OrderFall++;
-      if (!p->MinContext->Suffix)
-        return -1;
-      p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+      if (!mc->Suffix)
+        return PPMD7_SYM_END;
+      mc = Ppmd7_GetContext(p, mc->Suffix);
     }
-    while (p->MinContext->NumStats == numMasked);
-    hiCnt = 0;
-    s = Ppmd7_GetStats(p, p->MinContext);
-    i = 0;
-    num = p->MinContext->NumStats - numMasked;
-    do
+    while (mc->NumStats == numMasked);
+    
+    s = Ppmd7_GetStats(p, mc);
+
     {
-      int k = (int)(MASK(s->Symbol));
-      hiCnt += (s->Freq & k);
-      ps[i] = s++;
-      i -= k;
+      unsigned num = mc->NumStats;
+      unsigned num2 = num / 2;
+      
+      num &= 1;
+      hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+      s += num;
+      p->MinContext = mc;
+
+      do
+      {
+        unsigned sym0 = s[0].Symbol;
+        unsigned sym1 = s[1].Symbol;
+        s += 2;
+        hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+        hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+      }
+      while (--num2);
     }
-    while (i != num);
-    
+
     see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
     freqSum += hiCnt;
-    count = rc->GetThreshold(rc, freqSum);
+
+
+
+
+    count = RC_GetThreshold(freqSum);
     
     if (count < hiCnt)
     {
-      Byte symbol;
-      CPpmd_State **pps = ps;
-      for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
-      s = *pps;
-      rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+      Byte sym;
+
+      s = Ppmd7_GetStats(p, p->MinContext);
+      hiCnt = count;
+      // count -= s->Freq & (unsigned)(MASK(s->Symbol));
+      // if ((Int32)count >= 0)
+      {
+        for (;;)
+        {
+          count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+          // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+        };
+      }
+      s--;
+      RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+      // new (see->Summ) value can overflow over 16-bits in some rare cases
       Ppmd_See_Update(see);
       p->FoundState = s;
-      symbol = s->Symbol;
+      sym = s->Symbol;
       Ppmd7_Update2(p);
-      return symbol;
+      return sym;
     }
+
     if (count >= freqSum)
-      return -2;
-    rc->Decode(rc, hiCnt, freqSum - hiCnt);
+      return PPMD7_SYM_ERROR;
+    
+    RC_Decode(hiCnt, freqSum - hiCnt);
+
+    // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+    // new (see->Summ) value can overflow over 16-bits in some rare cases
     see->Summ = (UInt16)(see->Summ + freqSum);
-    do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+
+    s = Ppmd7_GetStats(p, p->MinContext);
+    s2 = s + p->MinContext->NumStats;
+    do
+    {
+      MASK(s->Symbol) = 0;
+      s++;
+    }
+    while (s != s2);
+  }
+}
+
+/*
+Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
+{
+  int sym = 0;
+  if (buf != lim)
+  do
+  {
+    sym = Ppmd7z_DecodeSymbol(p);
+    if (sym < 0)
+      break;
+    *buf = (Byte)sym;
   }
+  while (++buf < lim);
+  p->LastSymbol = sym;
+  return buf;
 }
+*/
diff --git a/deps/LZMA-SDK/C/Ppmd7Enc.c b/deps/LZMA-SDK/C/Ppmd7Enc.c
index a74d3002b..6af1ec15e 100644
--- a/deps/LZMA-SDK/C/Ppmd7Enc.c
+++ b/deps/LZMA-SDK/C/Ppmd7Enc.c
@@ -1,6 +1,8 @@
-/* Ppmd7Enc.c -- PPMdH Encoder
-2017-04-03 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+  PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
 
 #include "Precomp.h"
 
@@ -8,65 +10,60 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
 
 #define kTopValue (1 << 24)
 
-void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p)
+#define R (&p->rc.enc)
+
+void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
 {
-  p->Low = 0;
-  p->Range = 0xFFFFFFFF;
-  p->Cache = 0;
-  p->CacheSize = 1;
+  R->Low = 0;
+  R->Range = 0xFFFFFFFF;
+  R->Cache = 0;
+  R->CacheSize = 1;
 }
 
-static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p)
+MY_NO_INLINE
+static void RangeEnc_ShiftLow(CPpmd7 *p)
 {
-  if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0)
+  if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
   {
-    Byte temp = p->Cache;
+    Byte temp = R->Cache;
     do
     {
-      IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32)));
+      IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32)));
       temp = 0xFF;
     }
-    while (--p->CacheSize != 0);
-    p->Cache = (Byte)((UInt32)p->Low >> 24);
+    while (--R->CacheSize != 0);
+    R->Cache = (Byte)((UInt32)R->Low >> 24);
   }
-  p->CacheSize++;
-  p->Low = (UInt32)p->Low << 8;
+  R->CacheSize++;
+  R->Low = (UInt32)((UInt32)R->Low << 8);
 }
 
-static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total)
-{
-  p->Low += start * (p->Range /= total);
-  p->Range *= size;
-  while (p->Range < kTopValue)
-  {
-    p->Range <<= 8;
-    RangeEnc_ShiftLow(p);
-  }
-}
+#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p);
+#define RC_NORM_1(p) RC_NORM_BASE(p) }
+#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
 
-static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0)
-{
-  p->Range = (p->Range >> 14) * size0;
-  while (p->Range < kTopValue)
-  {
-    p->Range <<= 8;
-    RangeEnc_ShiftLow(p);
-  }
-}
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p)    // RC_NORM(p)
+#define RC_NORM_REMOTE(p)   RC_NORM(p)
+
+/*
+#define RangeEnc_Encode(p, start, _size_) \
+  { UInt32 size = _size_; \
+    R->Low += start * R->Range; \
+    R->Range *= size; \
+    RC_NORM_LOCAL(p); }
+*/
 
-static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0)
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
 {
-  UInt32 newBound = (p->Range >> 14) * size0;
-  p->Low += newBound;
-  p->Range -= newBound;
-  while (p->Range < kTopValue)
-  {
-    p->Range <<= 8;
-    RangeEnc_ShiftLow(p);
-  }
+  R->Low += start * R->Range;
+  R->Range *= size;
+  RC_NORM_LOCAL(p);
 }
 
-void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
+void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
 {
   unsigned i;
   for (i = 0; i < 5; i++)
@@ -74,31 +71,53 @@ void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
 }
 
 
-#define MASK(sym) ((signed char *)charMask)[sym]
 
-void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
+#define RC_Encode(start, size) RangeEnc_Encode(p, start, size);
+#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p);
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+#define SUFFIX(ctx) CTX((ctx)->Suffix)
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+
+void Ppmd7_UpdateModel(CPpmd7 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+MY_FORCE_INLINE
+static
+void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
 {
   size_t charMask[256 / sizeof(size_t)];
+  
   if (p->MinContext->NumStats != 1)
   {
     CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
     UInt32 sum;
     unsigned i;
+   
+
+    
+    
+    R->Range /= p->MinContext->Union2.SummFreq;
+    
     if (s->Symbol == symbol)
     {
-      RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq);
+      // R->Range /= p->MinContext->Union2.SummFreq;
+      RC_EncodeFinal(0, s->Freq);
       p->FoundState = s;
       Ppmd7_Update1_0(p);
       return;
     }
     p->PrevSuccess = 0;
     sum = s->Freq;
-    i = p->MinContext->NumStats - 1;
+    i = (unsigned)p->MinContext->NumStats - 1;
     do
     {
       if ((++s)->Symbol == symbol)
       {
-        RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq);
+        // R->Range /= p->MinContext->Union2.SummFreq;
+        RC_EncodeFinal(sum, s->Freq);
         p->FoundState = s;
         Ppmd7_Update1(p);
         return;
@@ -106,82 +125,199 @@ void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
       sum += s->Freq;
     }
     while (--i);
+
+    // R->Range /= p->MinContext->Union2.SummFreq;
+    RC_Encode(sum, p->MinContext->Union2.SummFreq - sum);
     
-    p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
+    p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
     PPMD_SetAllBitsIn256Bytes(charMask);
-    MASK(s->Symbol) = 0;
-    i = p->MinContext->NumStats - 1;
-    do { MASK((--s)->Symbol) = 0; } while (--i);
-    RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
+    // MASK(s->Symbol) = 0;
+    // i = p->MinContext->NumStats - 1;
+    // do { MASK((--s)->Symbol) = 0; } while (--i);
+    {
+      CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+      MASK(s->Symbol) = 0;
+      do
+      {
+        unsigned sym0 = s2[0].Symbol;
+        unsigned sym1 = s2[1].Symbol;
+        s2 += 2;
+        MASK(sym0) = 0;
+        MASK(sym1) = 0;
+      }
+      while (s2 < s);
+    }
   }
   else
   {
     UInt16 *prob = Ppmd7_GetBinSumm(p);
     CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
+    UInt32 pr = *prob;
+    UInt32 bound = (R->Range >> 14) * pr;
+    pr = PPMD_UPDATE_PROB_1(pr);
     if (s->Symbol == symbol)
     {
-      RangeEnc_EncodeBit_0(rc, *prob);
-      *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
-      p->FoundState = s;
-      Ppmd7_UpdateBin(p);
+      *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+      // RangeEnc_EncodeBit_0(p, bound);
+      R->Range = bound;
+      RC_NORM_1(p);
+      
+      // p->FoundState = s;
+      // Ppmd7_UpdateBin(p);
+      {
+        unsigned freq = s->Freq;
+        CTX_PTR c = CTX(SUCCESSOR(s));
+        p->FoundState = s;
+        p->PrevSuccess = 1;
+        p->RunLength++;
+        s->Freq = (Byte)(freq + (freq < 128));
+        // NextContext(p);
+        if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+          p->MaxContext = p->MinContext = c;
+        else
+          Ppmd7_UpdateModel(p);
+      }
       return;
     }
-    else
-    {
-      RangeEnc_EncodeBit_1(rc, *prob);
-      *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
-      p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
-      PPMD_SetAllBitsIn256Bytes(charMask);
-      MASK(s->Symbol) = 0;
-      p->PrevSuccess = 0;
-    }
+
+    *prob = (UInt16)pr;
+    p->InitEsc = p->ExpEscape[pr >> 10];
+    // RangeEnc_EncodeBit_1(p, bound);
+    R->Low += bound;
+    R->Range -= bound;
+    RC_NORM_LOCAL(p)
+    
+    PPMD_SetAllBitsIn256Bytes(charMask);
+    MASK(s->Symbol) = 0;
+    p->PrevSuccess = 0;
   }
+
   for (;;)
   {
-    UInt32 escFreq;
     CPpmd_See *see;
     CPpmd_State *s;
-    UInt32 sum;
-    unsigned i, numMasked = p->MinContext->NumStats;
+    UInt32 sum, escFreq;
+    CPpmd7_Context *mc;
+    unsigned i, numMasked;
+    
+    RC_NORM_REMOTE(p)
+
+    mc = p->MinContext;
+    numMasked = mc->NumStats;
+
     do
     {
       p->OrderFall++;
-      if (!p->MinContext->Suffix)
+      if (!mc->Suffix)
         return; /* EndMarker (symbol = -1) */
-      p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+      mc = Ppmd7_GetContext(p, mc->Suffix);
+      i = mc->NumStats;
     }
-    while (p->MinContext->NumStats == numMasked);
+    while (i == numMasked);
+
+    p->MinContext = mc;
     
-    see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
-    s = Ppmd7_GetStats(p, p->MinContext);
+    // see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
+    {
+      if (i != 256)
+      {
+        unsigned nonMasked = i - numMasked;
+        see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+            + p->HiBitsFlag
+            + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i)
+            + 2 * (unsigned)(mc->Union2.SummFreq < 11 * i)
+            + 4 * (unsigned)(numMasked > nonMasked);
+        {
+          // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+          unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+          unsigned r = (summ >> see->Shift);
+          see->Summ = (UInt16)(summ - r);
+          escFreq = r + (r == 0);
+        }
+      }
+      else
+      {
+        see = &p->DummySee;
+        escFreq = 1;
+      }
+    }
+
+    s = Ppmd7_GetStats(p, mc);
     sum = 0;
-    i = p->MinContext->NumStats;
+    // i = mc->NumStats;
+
     do
     {
-      int cur = s->Symbol;
-      if (cur == symbol)
+      unsigned cur = s->Symbol;
+      if ((int)cur == symbol)
       {
         UInt32 low = sum;
-        CPpmd_State *s1 = s;
-        do
+        UInt32 freq = s->Freq;
+        unsigned num2;
+
+        Ppmd_See_Update(see);
+        p->FoundState = s;
+        sum += escFreq;
+
+        num2 = i / 2;
+        i &= 1;
+        sum += freq & (0 - (UInt32)i);
+        if (num2 != 0)
         {
-          sum += (s->Freq & (int)(MASK(s->Symbol)));
-          s++;
+          s += i;
+          for (;;)
+          {
+            unsigned sym0 = s[0].Symbol;
+            unsigned sym1 = s[1].Symbol;
+            s += 2;
+            sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
+            sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
+            if (--num2 == 0)
+              break;
+          }
         }
-        while (--i);
-        RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq);
-        Ppmd_See_Update(see);
-        p->FoundState = s1;
+
+        
+        R->Range /= sum;
+        RC_EncodeFinal(low, freq);
         Ppmd7_Update2(p);
         return;
       }
-      sum += (s->Freq & (int)(MASK(cur)));
-      MASK(cur) = 0;
+      sum += (s->Freq & (unsigned)(MASK(cur)));
       s++;
     }
     while (--i);
     
-    RangeEnc_Encode(rc, sum, escFreq, sum + escFreq);
-    see->Summ = (UInt16)(see->Summ + sum + escFreq);
+    {
+      UInt32 total = sum + escFreq;
+      see->Summ = (UInt16)(see->Summ + total);
+
+      R->Range /= total;
+      RC_Encode(sum, escFreq);
+    }
+
+    {
+      CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+      s--;
+      MASK(s->Symbol) = 0;
+      do
+      {
+        unsigned sym0 = s2[0].Symbol;
+        unsigned sym1 = s2[1].Symbol;
+        s2 += 2;
+        MASK(sym0) = 0;
+        MASK(sym1) = 0;
+      }
+      while (s2 < s);
+    }
+  }
+}
+
+
+void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
+{
+  for (; buf < lim; buf++)
+  {
+    Ppmd7z_EncodeSymbol(p, *buf);
   }
 }
diff --git a/deps/LZMA-SDK/C/Sha256.c b/deps/LZMA-SDK/C/Sha256.c
index 90994e5ab..c03b75afe 100644
--- a/deps/LZMA-SDK/C/Sha256.c
+++ b/deps/LZMA-SDK/C/Sha256.c
@@ -1,5 +1,5 @@
-/* Crypto/Sha256.c -- SHA-256 Hash
-2017-04-03 : Igor Pavlov : Public domain
+/* Sha256.c -- SHA-256 Hash
+2021-04-01 : Igor Pavlov : Public domain
 This code is based on public domain code from Wei Dai's Crypto++ library. */
 
 #include "Precomp.h"
@@ -10,16 +10,107 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
 #include "RotateDefs.h"
 #include "Sha256.h"
 
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+// #define USE_MY_MM
+#endif
+
+#ifdef MY_CPU_X86_OR_AMD64
+  #ifdef _MSC_VER
+    #if _MSC_VER >= 1200
+      #define _SHA_SUPPORTED
+    #endif
+  #elif defined(__clang__)
+    #if (__clang_major__ >= 8) // fix that check
+      #define _SHA_SUPPORTED
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ >= 8) // fix that check
+      #define _SHA_SUPPORTED
+    #endif
+  #elif defined(__INTEL_COMPILER)
+    #if (__INTEL_COMPILER >= 1800) // fix that check
+      #define _SHA_SUPPORTED
+    #endif
+  #endif
+#elif defined(MY_CPU_ARM_OR_ARM64)
+  #ifdef _MSC_VER
+    #if _MSC_VER >= 1910
+      #define _SHA_SUPPORTED
+    #endif
+  #elif defined(__clang__)
+    #if (__clang_major__ >= 8) // fix that check
+      #define _SHA_SUPPORTED
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ >= 6) // fix that check
+      #define _SHA_SUPPORTED
+    #endif
+  #endif
+#endif
+
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+#ifdef _SHA_SUPPORTED
+  void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+  static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
+  static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
+
+  #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
+#else
+  #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
+#endif
+
+
+BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
+{
+  SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
+  
+  #ifdef _SHA_SUPPORTED
+    if (algo != SHA256_ALGO_SW)
+    {
+      if (algo == SHA256_ALGO_DEFAULT)
+        func = g_FUNC_UPDATE_BLOCKS;
+      else
+      {
+        if (algo != SHA256_ALGO_HW)
+          return False;
+        func = g_FUNC_UPDATE_BLOCKS_HW;
+        if (!func)
+          return False;
+      }
+    }
+  #else
+    if (algo > 1)
+      return False;
+  #endif
+
+  p->func_UpdateBlocks = func;
+  return True;
+}
+
+
 /* define it for speed optimization */
-#ifndef _SFX
-#define _SHA256_UNROLL
-#define _SHA256_UNROLL2
+
+#ifdef _SFX
+  #define STEP_PRE 1
+  #define STEP_MAIN 1
+#else
+  #define STEP_PRE 2
+  #define STEP_MAIN 4
+  // #define _SHA256_UNROLL
 #endif
 
-/* #define _SHA256_UNROLL2 */
+#if STEP_MAIN != 16
+  #define _SHA256_BIG_W
+#endif
 
-void Sha256_Init(CSha256 *p)
+
+
+
+void Sha256_InitState(CSha256 *p)
 {
+  p->count = 0;
   p->state[0] = 0x6a09e667;
   p->state[1] = 0xbb67ae85;
   p->state[2] = 0x3c6ef372;
@@ -28,7 +119,17 @@ void Sha256_Init(CSha256 *p)
   p->state[5] = 0x9b05688c;
   p->state[6] = 0x1f83d9ab;
   p->state[7] = 0x5be0cd19;
-  p->count = 0;
+}
+
+void Sha256_Init(CSha256 *p)
+{
+  p->func_UpdateBlocks =
+  #ifdef _SHA_SUPPORTED
+      g_FUNC_UPDATE_BLOCKS;
+  #else
+      NULL;
+  #endif
+  Sha256_InitState(p);
 }
 
 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
@@ -36,61 +137,100 @@ void Sha256_Init(CSha256 *p)
 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
 
-#define blk0(i) (W[i])
-#define blk2(i) (W[i] += s1(W[((i)-2)&15]) + W[((i)-7)&15] + s0(W[((i)-15)&15]))
-
 #define Ch(x,y,z) (z^(x&(y^z)))
 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
 
-#ifdef _SHA256_UNROLL2
-
-#define R(a,b,c,d,e,f,g,h, i) \
-    h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
-    d += h; \
-    h += S0(a) + Maj(a, b, c)
 
-#define RX_8(i) \
-  R(a,b,c,d,e,f,g,h, i); \
-  R(h,a,b,c,d,e,f,g, i+1); \
-  R(g,h,a,b,c,d,e,f, i+2); \
-  R(f,g,h,a,b,c,d,e, i+3); \
-  R(e,f,g,h,a,b,c,d, i+4); \
-  R(d,e,f,g,h,a,b,c, i+5); \
-  R(c,d,e,f,g,h,a,b, i+6); \
-  R(b,c,d,e,f,g,h,a, i+7)
+#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
 
-#define RX_16  RX_8(0); RX_8(8);
+#define blk2_main(j, i)  s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
 
+#ifdef _SHA256_BIG_W
+    // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
+    #define w(j, i)     W[(size_t)(j) + i]
+    #define blk2(j, i)  (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
 #else
+    #if STEP_MAIN == 16
+        #define w(j, i)  W[(i) & 15]
+    #else
+        #define w(j, i)  W[((size_t)(j) + (i)) & 15]
+    #endif
+    #define blk2(j, i)  (w(j, i) += blk2_main(j, i))
+#endif
 
-#define a(i) T[(0-(i))&7]
-#define b(i) T[(1-(i))&7]
-#define c(i) T[(2-(i))&7]
-#define d(i) T[(3-(i))&7]
-#define e(i) T[(4-(i))&7]
-#define f(i) T[(5-(i))&7]
-#define g(i) T[(6-(i))&7]
-#define h(i) T[(7-(i))&7]
-
-#define R(i) \
-    h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
-    d(i) += h(i); \
-    h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) \
+#define W_MAIN(i)  blk2(j, i)
 
-#ifdef _SHA256_UNROLL
 
-#define RX_8(i)  R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
-#define RX_16  RX_8(0); RX_8(8);
+#define T1(wx, i) \
+    tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+    h = g; \
+    g = f; \
+    f = e; \
+    e = d + tmp; \
+    tmp += S0(a) + Maj(a, b, c); \
+    d = c; \
+    c = b; \
+    b = a; \
+    a = tmp; \
 
-#else
+#define R1_PRE(i)  T1( W_PRE, i)
+#define R1_MAIN(i) T1( W_MAIN, i)
 
-#define RX_16  unsigned i; for (i = 0; i < 16; i++) { R(i); }
+#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
+#define R2_MAIN(i) \
+    R1_MAIN(i) \
+    R1_MAIN(i + 1) \
 
 #endif
 
+
+
+#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
+
+#define T4( a,b,c,d,e,f,g,h, wx, i) \
+    h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+    tmp = h; \
+    h += d; \
+    d = tmp + S0(a) + Maj(a, b, c); \
+
+#define R4( wx, i) \
+    T4 ( a,b,c,d,e,f,g,h, wx, (i  )); \
+    T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
+    T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
+    T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
+
+#define R4_PRE(i)  R4( W_PRE, i)
+#define R4_MAIN(i) R4( W_MAIN, i)
+
+
+#define T8( a,b,c,d,e,f,g,h, wx, i) \
+    h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+    d += h; \
+    h += S0(a) + Maj(a, b, c); \
+
+#define R8( wx, i) \
+    T8 ( a,b,c,d,e,f,g,h, wx, i  ); \
+    T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
+    T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
+    T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
+    T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
+    T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
+    T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
+    T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
+
+#define R8_PRE(i)  R8( W_PRE, i)
+#define R8_MAIN(i) R8( W_MAIN, i)
+
 #endif
 
-static const UInt32 K[64] = {
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+// static
+extern MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64] = {
   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
   0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
@@ -109,30 +249,27 @@ static const UInt32 K[64] = {
   0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 };
 
-static void Sha256_WriteByteBlock(CSha256 *p)
-{
-  UInt32 W[16];
-  unsigned j;
-  UInt32 *state;
+#define K SHA256_K_ARRAY
 
-  #ifdef _SHA256_UNROLL2
-  UInt32 a,b,c,d,e,f,g,h;
+
+MY_NO_INLINE
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+  UInt32 W
+  #ifdef _SHA256_BIG_W
+      [64];
   #else
-  UInt32 T[8];
+      [16];
   #endif
 
-  for (j = 0; j < 16; j += 4)
-  {
-    const Byte *ccc = p->buffer + j * 4;
-    W[j    ] = GetBe32(ccc);
-    W[j + 1] = GetBe32(ccc + 4);
-    W[j + 2] = GetBe32(ccc + 8);
-    W[j + 3] = GetBe32(ccc + 12);
-  }
+  unsigned j;
 
-  state = p->state;
+  UInt32 a,b,c,d,e,f,g,h;
 
-  #ifdef _SHA256_UNROLL2
+  #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
+  UInt32 tmp;
+  #endif
+  
   a = state[0];
   b = state[1];
   c = state[2];
@@ -141,39 +278,96 @@ static void Sha256_WriteByteBlock(CSha256 *p)
   f = state[5];
   g = state[6];
   h = state[7];
-  #else
-  for (j = 0; j < 8; j++)
-    T[j] = state[j];
-  #endif
 
-  for (j = 0; j < 64; j += 16)
+  while (numBlocks)
   {
-    RX_16
+
+  for (j = 0; j < 16; j += STEP_PRE)
+  {
+    #if STEP_PRE > 4
+
+      #if STEP_PRE < 8
+      R4_PRE(0);
+      #else
+      R8_PRE(0);
+      #if STEP_PRE == 16
+      R8_PRE(8);
+      #endif
+      #endif
+
+    #else
+
+      R1_PRE(0);
+      #if STEP_PRE >= 2
+      R1_PRE(1);
+      #if STEP_PRE >= 4
+      R1_PRE(2);
+      R1_PRE(3);
+      #endif
+      #endif
+    
+    #endif
+  }
+
+  for (j = 16; j < 64; j += STEP_MAIN)
+  {
+    #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
+
+      #if STEP_MAIN < 8
+      R4_MAIN(0);
+      #else
+      R8_MAIN(0);
+      #if STEP_MAIN == 16
+      R8_MAIN(8);
+      #endif
+      #endif
+
+    #else
+      
+      R1_MAIN(0);
+      #if STEP_MAIN >= 2
+      R1_MAIN(1);
+      #if STEP_MAIN >= 4
+      R2_MAIN(2);
+      #if STEP_MAIN >= 8
+      R2_MAIN(4);
+      R2_MAIN(6);
+      #if STEP_MAIN >= 16
+      R2_MAIN(8);
+      R2_MAIN(10);
+      R2_MAIN(12);
+      R2_MAIN(14);
+      #endif
+      #endif
+      #endif
+      #endif
+    #endif
+  }
+
+  a += state[0]; state[0] = a;
+  b += state[1]; state[1] = b;
+  c += state[2]; state[2] = c;
+  d += state[3]; state[3] = d;
+  e += state[4]; state[4] = e;
+  f += state[5]; state[5] = f;
+  g += state[6]; state[6] = g;
+  h += state[7]; state[7] = h;
+
+  data += 64;
+  numBlocks--;
   }
 
-  #ifdef _SHA256_UNROLL2
-  state[0] += a;
-  state[1] += b;
-  state[2] += c;
-  state[3] += d;
-  state[4] += e;
-  state[5] += f;
-  state[6] += g;
-  state[7] += h;
-  #else
-  for (j = 0; j < 8; j++)
-    state[j] += T[j];
-  #endif
-  
   /* Wipe variables */
   /* memset(W, 0, sizeof(W)); */
-  /* memset(T, 0, sizeof(T)); */
 }
 
 #undef S0
 #undef S1
 #undef s0
 #undef s1
+#undef K
+
+#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
 
 void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
 {
@@ -193,25 +387,26 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
       return;
     }
     
-    size -= num;
-    memcpy(p->buffer + pos, data, num);
-    data += num;
+    if (pos != 0)
+    {
+      size -= num;
+      memcpy(p->buffer + pos, data, num);
+      data += num;
+      Sha256_UpdateBlock(p);
+    }
   }
-
-  for (;;)
   {
-    Sha256_WriteByteBlock(p);
-    if (size < 64)
-      break;
-    size -= 64;
-    memcpy(p->buffer, data, 64);
-    data += 64;
-  }
-
-  if (size != 0)
+    size_t numBlocks = size >> 6;
+    UPDATE_BLOCKS(p)(p->state, data, numBlocks);
+    size &= 0x3F;
+    if (size == 0)
+      return;
+    data += (numBlocks << 6);
     memcpy(p->buffer, data, size);
+  }
 }
 
+
 void Sha256_Final(CSha256 *p, Byte *digest)
 {
   unsigned pos = (unsigned)p->count & 0x3F;
@@ -219,13 +414,30 @@ void Sha256_Final(CSha256 *p, Byte *digest)
   
   p->buffer[pos++] = 0x80;
   
-  while (pos != (64 - 8))
+  if (pos > (64 - 8))
+  {
+    while (pos != 64) { p->buffer[pos++] = 0; }
+    // memset(&p->buf.buffer[pos], 0, 64 - pos);
+    Sha256_UpdateBlock(p);
+    pos = 0;
+  }
+
+  /*
+  if (pos & 3)
   {
-    pos &= 0x3F;
-    if (pos == 0)
-      Sha256_WriteByteBlock(p);
-    p->buffer[pos++] = 0;
+    p->buffer[pos] = 0;
+    p->buffer[pos + 1] = 0;
+    p->buffer[pos + 2] = 0;
+    pos += 3;
+    pos &= ~3;
   }
+  {
+    for (; pos < 64 - 8; pos += 4)
+      *(UInt32 *)(&p->buffer[pos]) = 0;
+  }
+  */
+
+  memset(&p->buffer[pos], 0, (64 - 8) - pos);
 
   {
     UInt64 numBits = (p->count << 3);
@@ -233,16 +445,42 @@ void Sha256_Final(CSha256 *p, Byte *digest)
     SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
   }
   
-  Sha256_WriteByteBlock(p);
+  Sha256_UpdateBlock(p);
 
   for (i = 0; i < 8; i += 2)
   {
     UInt32 v0 = p->state[i];
-    UInt32 v1 = p->state[i + 1];
+    UInt32 v1 = p->state[(size_t)i + 1];
     SetBe32(digest    , v0);
     SetBe32(digest + 4, v1);
     digest += 8;
   }
   
-  Sha256_Init(p);
+  Sha256_InitState(p);
+}
+
+
+void Sha256Prepare()
+{
+  #ifdef _SHA_SUPPORTED
+  SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
+  f = Sha256_UpdateBlocks;
+  f_hw = NULL;
+  #ifdef MY_CPU_X86_OR_AMD64
+  #ifndef USE_MY_MM
+  if (CPU_IsSupported_SHA()
+      && CPU_IsSupported_SSSE3()
+      // && CPU_IsSupported_SSE41()
+      )
+  #endif
+  #else
+  if (CPU_IsSupported_SHA2())
+  #endif
+  {
+    // printf("\n========== HW SHA256 ======== \n");
+    f = f_hw = Sha256_UpdateBlocks_HW;
+  }
+  g_FUNC_UPDATE_BLOCKS    = f;
+  g_FUNC_UPDATE_BLOCKS_HW = f_hw;
+  #endif
 }
diff --git a/deps/LZMA-SDK/C/Sha256.h b/deps/LZMA-SDK/C/Sha256.h
index 7f17ccf9c..f52933986 100644
--- a/deps/LZMA-SDK/C/Sha256.h
+++ b/deps/LZMA-SDK/C/Sha256.h
@@ -1,26 +1,76 @@
 /* Sha256.h -- SHA-256 Hash
-2013-01-18 : Igor Pavlov : Public domain */
+2021-01-01 : Igor Pavlov : Public domain */
 
-#ifndef __CRYPTO_SHA256_H
-#define __CRYPTO_SHA256_H
+#ifndef __7Z_SHA256_H
+#define __7Z_SHA256_H
 
 #include "7zTypes.h"
 
 EXTERN_C_BEGIN
 
-#define SHA256_DIGEST_SIZE 32
+#define SHA256_NUM_BLOCK_WORDS  16
+#define SHA256_NUM_DIGEST_WORDS  8
+
+#define SHA256_BLOCK_SIZE   (SHA256_NUM_BLOCK_WORDS * 4)
+#define SHA256_DIGEST_SIZE  (SHA256_NUM_DIGEST_WORDS * 4)
+
+typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+/*
+  if (the system supports different SHA256 code implementations)
+  {
+    (CSha256::func_UpdateBlocks) will be used
+    (CSha256::func_UpdateBlocks) can be set by
+       Sha256_Init()        - to default (fastest)
+       Sha256_SetFunction() - to any algo
+  }
+  else
+  {
+    (CSha256::func_UpdateBlocks) is ignored.
+  }
+*/
 
 typedef struct
 {
-  UInt32 state[8];
+  SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
   UInt64 count;
-  Byte buffer[64];
+  UInt64 __pad_2[2];
+  UInt32 state[SHA256_NUM_DIGEST_WORDS];
+
+  Byte buffer[SHA256_BLOCK_SIZE];
 } CSha256;
 
+
+#define SHA256_ALGO_DEFAULT 0
+#define SHA256_ALGO_SW      1
+#define SHA256_ALGO_HW      2
+
+/*
+Sha256_SetFunction()
+return:
+  0 - (algo) value is not supported, and func_UpdateBlocks was not changed
+  1 - func_UpdateBlocks was set according (algo) value.
+*/
+
+BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo);
+
+void Sha256_InitState(CSha256 *p);
 void Sha256_Init(CSha256 *p);
 void Sha256_Update(CSha256 *p, const Byte *data, size_t size);
 void Sha256_Final(CSha256 *p, Byte *digest);
 
+
+
+
+// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+/*
+call Sha256Prepare() once at program start.
+It prepares all supported implementations, and detects the fastest implementation.
+*/
+
+void Sha256Prepare(void);
+
 EXTERN_C_END
 
 #endif
diff --git a/deps/LZMA-SDK/C/Sha256Opt.c b/deps/LZMA-SDK/C/Sha256Opt.c
new file mode 100644
index 000000000..cc8c53e1b
--- /dev/null
+++ b/deps/LZMA-SDK/C/Sha256Opt.c
@@ -0,0 +1,373 @@
+/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
+// #define USE_MY_MM
+#endif
+#endif
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+  #if defined(__clang__)
+    #if (__clang_major__ >= 8) // fix that check
+      #define USE_HW_SHA
+      #ifndef __SHA__
+        #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+        #if defined(_MSC_VER)
+          // SSSE3: for clang-cl:
+          #include <tmmintrin.h>
+          #define __SHA__
+        #endif
+      #endif
+
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ >= 8) // fix that check
+      #define USE_HW_SHA
+      #ifndef __SHA__
+        #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+        // #pragma GCC target("sha,ssse3")
+      #endif
+    #endif
+  #elif defined(__INTEL_COMPILER)
+    #if (__INTEL_COMPILER >= 1800) // fix that check
+      #define USE_HW_SHA
+    #endif
+  #elif defined(_MSC_VER)
+    #ifdef USE_MY_MM
+      #define USE_VER_MIN 1300
+    #else
+      #define USE_VER_MIN 1910
+    #endif
+    #if _MSC_VER >= USE_VER_MIN
+      #define USE_HW_SHA
+    #endif
+  #endif
+// #endif // MY_CPU_X86_OR_AMD64
+
+#ifdef USE_HW_SHA
+
+// #pragma message("Sha256 HW")
+// #include <wmmintrin.h>
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include <immintrin.h>
+#else
+#include <emmintrin.h>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+// #include <intrin.h>
+#endif
+
+#ifdef USE_MY_MM
+#include "My_mm.h"
+#endif
+
+#endif
+
+/*
+SHA256 uses:
+SSE2:
+  _mm_loadu_si128
+  _mm_storeu_si128
+  _mm_set_epi32
+  _mm_add_epi32
+  _mm_shuffle_epi32 / pshufd
+
+
+  
+SSSE3:
+  _mm_shuffle_epi8 / pshufb
+  _mm_alignr_epi8
+SHA:
+  _mm_sha256*
+*/
+
+// K array must be aligned for 16-bytes at least.
+// The compiler can look align attribute and selects
+//   movdqu - for code without align attribute
+//   movdqa - for code with    align attribute
+extern
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+#define K SHA256_K_ARRAY
+
+
+#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
+#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
+#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
+
+
+#define LOAD_SHUFFLE(m, k) \
+    m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
+    m = _mm_shuffle_epi8(m, mask); \
+
+#define SM1(g0, g1, g2, g3) \
+    SHA256_MSG1(g3, g0); \
+
+#define SM2(g0, g1, g2, g3) \
+    tmp = _mm_alignr_epi8(g1, g0, 4); \
+    ADD_EPI32(g2, tmp); \
+    SHA25G_MSG2(g2, g1); \
+
+// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
+// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
+
+
+#define NNN(g0, g1, g2, g3)
+
+
+#define RND2(t0, t1) \
+    t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
+
+#define RND2_0(m, k) \
+    msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
+    RND2(state0, state1); \
+    msg = _mm_shuffle_epi32(msg, 0x0E); \
+
+
+#define RND2_1 \
+    RND2(state1, state0); \
+
+
+// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
+
+#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+    RND2_0(g0, k); \
+    OP0(g0, g1, g2, g3); \
+    RND2_1; \
+    OP1(g0, g1, g2, g3); \
+
+#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+    R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+    R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+    R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+    R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+
+#define PREPARE_STATE \
+    tmp    = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
+    state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
+    state1 = state0; \
+    state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
+    state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
+
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+  const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+  __m128i tmp;
+  __m128i state0, state1;
+
+  if (numBlocks == 0)
+    return;
+
+  state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
+  state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
+  
+  PREPARE_STATE
+
+  do
+  {
+    __m128i state0_save, state1_save;
+    __m128i m0, m1, m2, m3;
+    __m128i msg;
+    // #define msg tmp
+
+    state0_save = state0;
+    state1_save = state1;
+    
+    LOAD_SHUFFLE (m0, 0)
+    LOAD_SHUFFLE (m1, 1)
+    LOAD_SHUFFLE (m2, 2)
+    LOAD_SHUFFLE (m3, 3)
+
+
+
+    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+    
+    ADD_EPI32(state0, state0_save);
+    ADD_EPI32(state1, state1_save);
+    
+    data += 64;
+  }
+  while (--numBlocks);
+
+  PREPARE_STATE
+
+  _mm_storeu_si128((__m128i *) (void *) &state[0], state0);
+  _mm_storeu_si128((__m128i *) (void *) &state[4], state1);
+}
+
+#endif // USE_HW_SHA
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+  #if defined(__clang__)
+    #if (__clang_major__ >= 8) // fix that check
+      #define USE_HW_SHA
+    #endif
+  #elif defined(__GNUC__)
+    #if (__GNUC__ >= 6) // fix that check
+      #define USE_HW_SHA
+    #endif
+  #elif defined(_MSC_VER)
+    #if _MSC_VER >= 1910
+      #define USE_HW_SHA
+    #endif
+  #endif
+
+#ifdef USE_HW_SHA
+
+// #pragma message("=== Sha256 HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+  #ifdef MY_CPU_ARM64
+    #define ATTRIB_SHA __attribute__((__target__("+crypto")))
+  #else
+    #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+  #endif
+#else
+  // _MSC_VER
+  // for arm32
+  #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+
+typedef uint32x4_t v128;
+// typedef __n128 v128; // MSVC
+
+#ifdef MY_CPU_BE
+  #define MY_rev32_for_LE(x)
+#else
+  #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
+#endif
+
+#define LOAD_128(_p)      (*(const v128 *)(const void *)(_p))
+#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
+
+#define LOAD_SHUFFLE(m, k) \
+    m = LOAD_128((data + (k) * 16)); \
+    MY_rev32_for_LE(m); \
+
+// K array must be aligned for 16-bytes at least.
+extern
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+#define K SHA256_K_ARRAY
+
+
+#define SHA256_SU0(dest, src)        dest = vsha256su0q_u32(dest, src);
+#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
+
+#define SM1(g0, g1, g2, g3)  SHA256_SU0(g3, g0)
+#define SM2(g0, g1, g2, g3)  SHA25G_SU1(g2, g0, g1)
+#define NNN(g0, g1, g2, g3)
+
+
+#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+    msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
+    tmp = state0; \
+    state0 = vsha256hq_u32( state0, state1, msg ); \
+    state1 = vsha256h2q_u32( state1, tmp, msg ); \
+    OP0(g0, g1, g2, g3); \
+    OP1(g0, g1, g2, g3); \
+
+
+#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+    R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+    R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+    R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+    R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+  v128 state0, state1;
+
+  if (numBlocks == 0)
+    return;
+
+  state0 = LOAD_128(&state[0]);
+  state1 = LOAD_128(&state[4]);
+  
+  do
+  {
+    v128 state0_save, state1_save;
+    v128 m0, m1, m2, m3;
+    v128 msg, tmp;
+
+    state0_save = state0;
+    state1_save = state1;
+    
+    LOAD_SHUFFLE (m0, 0)
+    LOAD_SHUFFLE (m1, 1)
+    LOAD_SHUFFLE (m2, 2)
+    LOAD_SHUFFLE (m3, 3)
+
+    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+    
+    state0 = vaddq_u32(state0, state0_save);
+    state1 = vaddq_u32(state1, state1_save);
+    
+    data += 64;
+  }
+  while (--numBlocks);
+
+  STORE_128(&state[0], state0);
+  STORE_128(&state[4], state1);
+}
+
+#endif // USE_HW_SHA
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#ifndef USE_HW_SHA
+
+// #error Stop_Compiling_UNSUPPORTED_SHA
+// #include <stdlib.h>
+
+// #include "Sha256.h"
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+#pragma message("Sha256 HW-SW stub was used")
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+  Sha256_UpdateBlocks(state, data, numBlocks);
+  /*
+  UNUSED_VAR(state);
+  UNUSED_VAR(data);
+  UNUSED_VAR(numBlocks);
+  exit(1);
+  return;
+  */
+}
+
+#endif
diff --git a/deps/LZMA-SDK/C/Threads.c b/deps/LZMA-SDK/C/Threads.c
index 8fd86f224..402abab01 100644
--- a/deps/LZMA-SDK/C/Threads.c
+++ b/deps/LZMA-SDK/C/Threads.c
@@ -1,8 +1,10 @@
 /* Threads.c -- multithreading library
-2017-06-26 : Igor Pavlov : Public domain */
+2021-04-25 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
+#ifdef _WIN32
+
 #ifndef UNDER_CE
 #include <process.h>
 #endif
@@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p)
   return 0;
 }
 
-WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); }
+WRes Handle_WaitObject(HANDLE h)
+{
+  DWORD dw = WaitForSingleObject(h, INFINITE);
+  /*
+    (dw) result:
+    WAIT_OBJECT_0  // 0
+    WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
+    WAIT_TIMEOUT   // 0x00000102 : is     compatible with Win32 Error space
+    WAIT_FAILED    // 0xFFFFFFFF
+  */
+  if (dw == WAIT_FAILED)
+  {
+    dw = GetLastError();
+    if (dw == 0)
+      return WAIT_FAILED;
+  }
+  return (WRes)dw;
+}
+
+#define Thread_Wait(p) Handle_WaitObject(*(p))
+
+WRes Thread_Wait_Close(CThread *p)
+{
+  WRes res = Thread_Wait(p);
+  WRes res2 = Thread_Close(p);
+  return (res != 0 ? res : res2);
+}
 
 WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
 {
   /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
-  
+
   #ifdef UNDER_CE
-  
+
   DWORD threadId;
   *p = CreateThread(0, 0, func, param, 0, &threadId);
-
+  
   #else
-
+  
   unsigned threadId;
-  *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId);
-   
+  *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
+  
   #endif
 
   /* maybe we must use errno here, but probably GetLastError() is also OK. */
   return HandleToWRes(*p);
 }
 
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+  #ifdef UNDER_CE
+
+  UNUSED_VAR(affinity)
+  return Thread_Create(p, func, param);
+  
+  #else
+  
+  /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+  HANDLE h;
+  WRes wres;
+  unsigned threadId;
+  h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
+  *p = h;
+  wres = HandleToWRes(h);
+  if (h)
+  {
+    {
+      // DWORD_PTR prevMask =
+      SetThreadAffinityMask(h, (DWORD_PTR)affinity);
+      /*
+      if (prevMask == 0)
+      {
+        // affinity change is non-critical error, so we can ignore it
+        // wres = GetError();
+      }
+      */
+    }
+    {
+      DWORD prevSuspendCount = ResumeThread(h);
+      /* ResumeThread() returns:
+         0 : was_not_suspended
+         1 : was_resumed
+        -1 : error
+      */
+      if (prevSuspendCount == (DWORD)-1)
+        wres = GetError();
+    }
+  }
+
+  /* maybe we must use errno here, but probably GetLastError() is also OK. */
+  return wres;
+
+  #endif
+}
+
+
 static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
 {
   *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
@@ -68,6 +145,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven
 
 WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
 {
+  // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
   *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
   return HandleToWRes(*p);
 }
@@ -93,3 +171,336 @@ WRes CriticalSection_Init(CCriticalSection *p)
   #endif
   return 0;
 }
+
+
+
+
+#else // _WIN32
+
+// ---------- POSIX ----------
+
+#ifndef __APPLE__
+#ifndef _7ZIP_AFFINITY_DISABLE
+// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
+#define _GNU_SOURCE
+#endif
+#endif
+
+#include "Threads.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _7ZIP_AFFINITY_SUPPORTED
+// #include <sched.h>
+#endif
+
+
+// #include <stdio.h>
+// #define PRF(p) p
+#define PRF(p)
+
+#define Print(s) PRF(printf("\n%s\n", s))
+
+// #include <stdio.h>
+
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
+{
+  // new thread in Posix probably inherits affinity from parrent thread
+  Print("Thread_Create_With_CpuSet");
+
+  pthread_attr_t attr;
+  int ret;
+  // int ret2;
+
+  p->_created = 0;
+
+  RINOK(pthread_attr_init(&attr));
+
+  ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+  if (!ret)
+  {
+    if (cpuSet)
+    {
+      #ifdef _7ZIP_AFFINITY_SUPPORTED
+      
+      /*
+      printf("\n affinity :");
+      unsigned i;
+      for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
+      {
+        Byte b = *((const Byte *)cpuSet + i);
+        char temp[32];
+        #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+        temp[0] = GET_HEX_CHAR((b & 0xF));
+        temp[1] = GET_HEX_CHAR((b >> 4));
+        // temp[0] = GET_HEX_CHAR((b >> 4));  // big-endian
+        // temp[1] = GET_HEX_CHAR((b & 0xF));  // big-endian
+        temp[2] = 0;
+        printf("%s", temp);
+      }
+      printf("\n");
+      */
+
+      // ret2 =
+      pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
+      // if (ret2) ret = ret2;
+      #endif
+    }
+    
+    ret = pthread_create(&p->_tid, &attr, func, param);
+    
+    if (!ret)
+    {
+      p->_created = 1;
+      /*
+      if (cpuSet)
+      {
+        // ret2 =
+        pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
+        // if (ret2) ret = ret2;
+      }
+      */
+    }
+  }
+  // ret2 =
+  pthread_attr_destroy(&attr);
+  // if (ret2 != 0) ret = ret2;
+  return ret;
+}
+
+
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
+{
+  return Thread_Create_With_CpuSet(p, func, param, NULL);
+}
+
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+  Print("Thread_Create_WithAffinity");
+  CCpuSet cs;
+  unsigned i;
+  CpuSet_Zero(&cs);
+  for (i = 0; i < sizeof(affinity) * 8; i++)
+  {
+    if (affinity == 0)
+      break;
+    if (affinity & 1)
+    {
+      CpuSet_Set(&cs, i);
+    }
+    affinity >>= 1;
+  }
+  return Thread_Create_With_CpuSet(p, func, param, &cs);
+}
+
+
+WRes Thread_Close(CThread *p)
+{
+  // Print("Thread_Close");
+  int ret;
+  if (!p->_created)
+    return 0;
+    
+  ret = pthread_detach(p->_tid);
+  p->_tid = 0;
+  p->_created = 0;
+  return ret;
+}
+
+
+WRes Thread_Wait_Close(CThread *p)
+{
+  // Print("Thread_Wait_Close");
+  void *thread_return;
+  int ret;
+  if (!p->_created)
+    return EINVAL;
+
+  ret = pthread_join(p->_tid, &thread_return);
+  // probably we can't use that (_tid) after pthread_join(), so we close thread here
+  p->_created = 0;
+  p->_tid = 0;
+  return ret;
+}
+
+
+
+static WRes Event_Create(CEvent *p, int manualReset, int signaled)
+{
+  RINOK(pthread_mutex_init(&p->_mutex, NULL));
+  RINOK(pthread_cond_init(&p->_cond, NULL));
+  p->_manual_reset = manualReset;
+  p->_state = (signaled ? True : False);
+  p->_created = 1;
+  return 0;
+}
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
+  { return Event_Create(p, True, signaled); }
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
+  { return ManualResetEvent_Create(p, 0); }
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
+  { return Event_Create(p, False, signaled); }
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
+  { return AutoResetEvent_Create(p, 0); }
+
+
+WRes Event_Set(CEvent *p)
+{
+  RINOK(pthread_mutex_lock(&p->_mutex));
+  p->_state = True;
+  int res1 = pthread_cond_broadcast(&p->_cond);
+  int res2 = pthread_mutex_unlock(&p->_mutex);
+  return (res2 ? res2 : res1);
+}
+
+WRes Event_Reset(CEvent *p)
+{
+  RINOK(pthread_mutex_lock(&p->_mutex));
+  p->_state = False;
+  return pthread_mutex_unlock(&p->_mutex);
+}
+ 
+WRes Event_Wait(CEvent *p)
+{
+  RINOK(pthread_mutex_lock(&p->_mutex));
+  while (p->_state == False)
+  {
+    // ETIMEDOUT
+    // ret =
+    pthread_cond_wait(&p->_cond, &p->_mutex);
+    // if (ret != 0) break;
+  }
+  if (p->_manual_reset == False)
+  {
+    p->_state = False;
+  }
+  return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Close(CEvent *p)
+{
+  if (!p->_created)
+    return 0;
+  p->_created = 0;
+  {
+    int res1 = pthread_mutex_destroy(&p->_mutex);
+    int res2 = pthread_cond_destroy(&p->_cond);
+    return (res1 ? res1 : res2);
+  }
+}
+
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+  if (initCount > maxCount || maxCount < 1)
+    return EINVAL;
+  RINOK(pthread_mutex_init(&p->_mutex, NULL));
+  RINOK(pthread_cond_init(&p->_cond, NULL));
+  p->_count = initCount;
+  p->_maxCount = maxCount;
+  p->_created = 1;
+  return 0;
+}
+
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
+{
+  UInt32 newCount;
+  int ret;
+
+  if (releaseCount < 1)
+    return EINVAL;
+
+  RINOK(pthread_mutex_lock(&p->_mutex));
+
+  newCount = p->_count + releaseCount;
+  if (newCount > p->_maxCount)
+    ret = ERROR_TOO_MANY_POSTS; // EINVAL;
+  else
+  {
+    p->_count = newCount;
+    ret = pthread_cond_broadcast(&p->_cond);
+  }
+  RINOK(pthread_mutex_unlock(&p->_mutex));
+  return ret;
+}
+
+WRes Semaphore_Wait(CSemaphore *p)
+{
+  RINOK(pthread_mutex_lock(&p->_mutex));
+  while (p->_count < 1)
+  {
+    pthread_cond_wait(&p->_cond, &p->_mutex);
+  }
+  p->_count--;
+  return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Semaphore_Close(CSemaphore *p)
+{
+  if (!p->_created)
+    return 0;
+  p->_created = 0;
+  {
+    int res1 = pthread_mutex_destroy(&p->_mutex);
+    int res2 = pthread_cond_destroy(&p->_cond);
+    return (res1 ? res1 : res2);
+  }
+}
+
+
+
+WRes CriticalSection_Init(CCriticalSection *p)
+{
+  // Print("CriticalSection_Init");
+  if (!p)
+    return EINTR;
+  return pthread_mutex_init(&p->_mutex, NULL);
+}
+
+void CriticalSection_Enter(CCriticalSection *p)
+{
+  // Print("CriticalSection_Enter");
+  if (p)
+  {
+    // int ret =
+    pthread_mutex_lock(&p->_mutex);
+  }
+}
+
+void CriticalSection_Leave(CCriticalSection *p)
+{
+  // Print("CriticalSection_Leave");
+  if (p)
+  {
+    // int ret =
+    pthread_mutex_unlock(&p->_mutex);
+  }
+}
+
+void CriticalSection_Delete(CCriticalSection *p)
+{
+  // Print("CriticalSection_Delete");
+  if (p)
+  {
+    // int ret =
+    pthread_mutex_destroy(&p->_mutex);
+  }
+}
+
+LONG InterlockedIncrement(LONG volatile *addend)
+{
+  // Print("InterlockedIncrement");
+  #ifdef USE_HACK_UNSAFE_ATOMIC
+    LONG val = *addend + 1;
+    *addend = val;
+    return val;
+  #else
+    return __sync_add_and_fetch(addend, 1);
+  #endif
+}
+
+#endif // _WIN32
diff --git a/deps/LZMA-SDK/C/Threads.h b/deps/LZMA-SDK/C/Threads.h
index f913241ae..8d4a10df9 100644
--- a/deps/LZMA-SDK/C/Threads.h
+++ b/deps/LZMA-SDK/C/Threads.h
@@ -1,38 +1,106 @@
 /* Threads.h -- multithreading library
-2017-06-18 : Igor Pavlov : Public domain */
+2021-04-25 : Igor Pavlov : Public domain */
 
 #ifndef __7Z_THREADS_H
 #define __7Z_THREADS_H
 
 #ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
+#else
+
+#if !defined(__APPLE__) && !defined(_AIX)
+#ifndef _7ZIP_AFFINITY_DISABLE
+#define _7ZIP_AFFINITY_SUPPORTED
+// #define _GNU_SOURCE
+#endif
+#endif
+
+#include <pthread.h>
 #endif
 
 #include "7zTypes.h"
 
 EXTERN_C_BEGIN
 
+#ifdef _WIN32
+
 WRes HandlePtr_Close(HANDLE *h);
 WRes Handle_WaitObject(HANDLE h);
 
 typedef HANDLE CThread;
-#define Thread_Construct(p) *(p) = NULL
+
+#define Thread_Construct(p) { *(p) = NULL; }
 #define Thread_WasCreated(p) (*(p) != NULL)
 #define Thread_Close(p) HandlePtr_Close(p)
-#define Thread_Wait(p) Handle_WaitObject(*(p))
+// #define Thread_Wait(p) Handle_WaitObject(*(p))
 
 typedef
-#ifdef UNDER_CE
-  DWORD
+    #ifdef UNDER_CE
+      DWORD
+    #else
+      unsigned
+    #endif
+    THREAD_FUNC_RET_TYPE;
+
+typedef DWORD_PTR CAffinityMask;
+typedef DWORD_PTR CCpuSet;
+
+#define CpuSet_Zero(p) { *(p) = 0; }
+#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); }
+
+#else //  _WIN32
+
+typedef struct _CThread
+{
+  pthread_t _tid;
+  int _created;
+} CThread;
+
+#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; }
+#define Thread_WasCreated(p) ((p)->_created != 0)
+WRes Thread_Close(CThread *p);
+// #define Thread_Wait Thread_Wait_Close
+
+typedef void * THREAD_FUNC_RET_TYPE;
+
+typedef UInt64 CAffinityMask;
+
+#ifdef _7ZIP_AFFINITY_SUPPORTED
+
+typedef cpu_set_t CCpuSet;
+#define CpuSet_Zero(p) CPU_ZERO(p)
+#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
+#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
+
 #else
-  unsigned
+
+typedef UInt64 CCpuSet;
+#define CpuSet_Zero(p) { *(p) = 0; }
+#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); }
+#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
+
 #endif
-  THREAD_FUNC_RET_TYPE;
+
+
+#endif //  _WIN32
+
 
 #define THREAD_FUNC_CALL_TYPE MY_STD_CALL
 #define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
 typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
 WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
+WRes Thread_Wait_Close(CThread *p);
+
+#ifdef _WIN32
+#define Thread_Create_With_CpuSet(p, func, param, cs) \
+  Thread_Create_With_Affinity(p, func, param, *cs)
+#else
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
+#endif
+
+
+#ifdef _WIN32
 
 typedef HANDLE CEvent;
 typedef CEvent CAutoResetEvent;
@@ -63,6 +131,67 @@ WRes CriticalSection_Init(CCriticalSection *p);
 #define CriticalSection_Enter(p) EnterCriticalSection(p)
 #define CriticalSection_Leave(p) LeaveCriticalSection(p)
 
+
+#else // _WIN32
+
+typedef struct _CEvent
+{
+  int _created;
+  int _manual_reset;
+  int _state;
+  pthread_mutex_t _mutex;
+  pthread_cond_t _cond;
+} CEvent;
+
+typedef CEvent CAutoResetEvent;
+typedef CEvent CManualResetEvent;
+
+#define Event_Construct(p) (p)->_created = 0
+#define Event_IsCreated(p) ((p)->_created)
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
+WRes Event_Set(CEvent *p);
+WRes Event_Reset(CEvent *p);
+WRes Event_Wait(CEvent *p);
+WRes Event_Close(CEvent *p);
+
+
+typedef struct _CSemaphore
+{
+  int _created;
+  UInt32 _count;
+  UInt32 _maxCount;
+  pthread_mutex_t _mutex;
+  pthread_cond_t _cond;
+} CSemaphore;
+
+#define Semaphore_Construct(p) (p)->_created = 0
+#define Semaphore_IsCreated(p) ((p)->_created)
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
+#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
+WRes Semaphore_Wait(CSemaphore *p);
+WRes Semaphore_Close(CSemaphore *p);
+
+
+typedef struct _CCriticalSection
+{
+  pthread_mutex_t _mutex;
+} CCriticalSection;
+
+WRes CriticalSection_Init(CCriticalSection *p);
+void CriticalSection_Delete(CCriticalSection *cs);
+void CriticalSection_Enter(CCriticalSection *cs);
+void CriticalSection_Leave(CCriticalSection *cs);
+
+LONG InterlockedIncrement(LONG volatile *addend);
+
+#endif  // _WIN32
+
 EXTERN_C_END
 
 #endif
diff --git a/deps/LZMA-SDK/C/Util/7z/7zMain.c b/deps/LZMA-SDK/C/Util/7z/7zMain.c
index 1c02b48ec..3ab63a3d9 100644
--- a/deps/LZMA-SDK/C/Util/7z/7zMain.c
+++ b/deps/LZMA-SDK/C/Util/7z/7zMain.c
@@ -1,5 +1,5 @@
 /* 7zMain.c - Test application for 7z Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-29 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -20,6 +20,13 @@
 #ifdef _WIN32
 #include <direct.h>
 #else
+#include <stdlib.h>
+#include <time.h>
+#ifdef __GNUC__
+#include <sys/time.h>
+#endif
+#include <fcntl.h>
+// #include <utime.h>
 #include <sys/stat.h>
 #include <errno.h>
 #endif
@@ -108,7 +115,7 @@ static Byte *Utf16_To_Utf8(Byte *dest, const UInt16 *src, const UInt16 *srcLim)
     
     if (val < 0x80)
     {
-      *dest++ = (char)val;
+      *dest++ = (Byte)val;
       continue;
     }
 
@@ -162,21 +169,21 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s
     )
 {
   unsigned len = 0;
-  for (len = 0; s[len] != 0; len++);
+  for (len = 0; s[len] != 0; len++) {}
 
   #ifndef _USE_UTF8
   {
-    unsigned size = len * 3 + 100;
+    const unsigned size = len * 3 + 100;
     if (!Buf_EnsureSize(buf, size))
       return SZ_ERROR_MEM;
     {
       buf->data[0] = 0;
       if (len != 0)
       {
-        char defaultChar = '_';
+        const char defaultChar = '_';
         BOOL defUsed;
-        unsigned numChars = 0;
-        numChars = WideCharToMultiByte(codePage, 0, (LPCWSTR)s, len, (char *)buf->data, size, &defaultChar, &defUsed);
+        const unsigned numChars = (unsigned)WideCharToMultiByte(
+            codePage, 0, (LPCWSTR)s, (int)len, (char *)buf->data, (int)size, &defaultChar, &defUsed);
         if (numChars == 0 || numChars >= size)
           return SZ_ERROR_FAIL;
         buf->data[numChars] = 0;
@@ -192,8 +199,8 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s
 #ifdef _WIN32
   #ifndef USE_WINDOWS_FILE
     static UINT g_FileCodePage = CP_ACP;
+    #define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage
   #endif
-  #define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage
 #else
   #define MY_FILE_CODE_PAGE_PARAM
 #endif
@@ -300,17 +307,142 @@ static void UIntToStr_2(char *s, unsigned value)
   s[1] = (char)('0' + (value % 10));
 }
 
+
 #define PERIOD_4 (4 * 365 + 1)
 #define PERIOD_100 (PERIOD_4 * 25 - 1)
 #define PERIOD_400 (PERIOD_100 * 4 + 1)
 
-static void ConvertFileTimeToString(const CNtfsFileTime *nt, char *s)
+
+
+#ifndef _WIN32
+
+// MS uses long for BOOL, but long is 32-bit in MS. So we use int.
+// typedef long BOOL;
+typedef int BOOL;
+
+typedef struct _FILETIME
+{
+  DWORD dwLowDateTime;
+  DWORD dwHighDateTime;
+} FILETIME;
+
+static LONG TIME_GetBias()
+{
+  time_t utc = time(NULL);
+  struct tm *ptm = localtime(&utc);
+  int localdaylight = ptm->tm_isdst; /* daylight for local timezone */
+  ptm = gmtime(&utc);
+  ptm->tm_isdst = localdaylight; /* use local daylight, not that of Greenwich */
+  LONG bias = (int)(mktime(ptm)-utc);
+  return bias;
+}
+
+#define TICKS_PER_SEC 10000000
+
+#define GET_TIME_64(pft) ((pft)->dwLowDateTime | ((UInt64)(pft)->dwHighDateTime << 32))
+
+#define SET_FILETIME(ft, v64) \
+   (ft)->dwLowDateTime = (DWORD)v64; \
+   (ft)->dwHighDateTime = (DWORD)(v64 >> 32);
+
+#define WINAPI
+#define TRUE 1
+
+static BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime)
+{
+  UInt64 v = GET_TIME_64(fileTime);
+  v = (UInt64)((Int64)v - (Int64)TIME_GetBias() * TICKS_PER_SEC);
+  SET_FILETIME(localFileTime, v);
+  return TRUE;
+}
+
+static const UInt32 kNumTimeQuantumsInSecond = 10000000;
+static const UInt32 kFileTimeStartYear = 1601;
+static const UInt32 kUnixTimeStartYear = 1970;
+static const UInt64 kUnixTimeOffset =
+    (UInt64)60 * 60 * 24 * (89 + 365 * (kUnixTimeStartYear - kFileTimeStartYear));
+
+static Int64 Time_FileTimeToUnixTime64(const FILETIME *ft)
+{
+  UInt64 winTime = GET_TIME_64(ft);
+  return (Int64)(winTime / kNumTimeQuantumsInSecond) - (Int64)kUnixTimeOffset;
+}
+
+#if defined(_AIX)
+  #define MY_ST_TIMESPEC st_timespec
+#else
+  #define MY_ST_TIMESPEC timespec
+#endif
+
+static void FILETIME_To_timespec(const FILETIME *ft, struct MY_ST_TIMESPEC *ts)
+{
+  if (ft)
+  {
+    const Int64 sec = Time_FileTimeToUnixTime64(ft);
+    // time_t is long
+    const time_t sec2 = (time_t)sec;
+    if (sec2 == sec)
+    {
+      ts->tv_sec = sec2;
+      UInt64 winTime = GET_TIME_64(ft);
+      ts->tv_nsec = (long)((winTime % 10000000) * 100);;
+      return;
+    }
+  }
+  // else
+  {
+    ts->tv_sec = 0;
+    // ts.tv_nsec = UTIME_NOW; // set to the current time
+    ts->tv_nsec = UTIME_OMIT; // keep old timesptamp
+  }
+}
+
+static WRes Set_File_FILETIME(const UInt16 *name, const FILETIME *mTime)
+{
+  struct timespec times[2];
+  
+  const int flags = 0; // follow link
+    // = AT_SYMLINK_NOFOLLOW; // don't follow link
+
+  CBuf buf;
+  int res;
+  Buf_Init(&buf);
+  RINOK(Utf16_To_Char(&buf, name MY_FILE_CODE_PAGE_PARAM));
+  FILETIME_To_timespec(NULL, &times[0]);
+  FILETIME_To_timespec(mTime, &times[1]);
+  res = utimensat(AT_FDCWD, (const char *)buf.data, times, flags);
+  Buf_Free(&buf, &g_Alloc);
+  if (res == 0)
+    return 0;
+  return errno;
+}
+
+#endif
+
+static void NtfsFileTime_to_FILETIME(const CNtfsFileTime *t, FILETIME *ft)
+{
+  ft->dwLowDateTime = (DWORD)(t->Low);
+  ft->dwHighDateTime = (DWORD)(t->High);
+}
+
+static void ConvertFileTimeToString(const CNtfsFileTime *nTime, char *s)
 {
   unsigned year, mon, hour, min, sec;
   Byte ms[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
   unsigned t;
   UInt32 v;
-  UInt64 v64 = nt->Low | ((UInt64)nt->High << 32);
+  // UInt64 v64 = nt->Low | ((UInt64)nt->High << 32);
+  UInt64 v64;
+  {
+    FILETIME fileTime, locTime;
+    NtfsFileTime_to_FILETIME(nTime, &fileTime);
+    if (!FileTimeToLocalFileTime(&fileTime, &locTime))
+    {
+      locTime.dwHighDateTime =
+      locTime.dwLowDateTime = 0;
+    }
+    v64 = locTime.dwLowDateTime | ((UInt64)locTime.dwHighDateTime << 32);
+  }
   v64 /= 10000000;
   sec = (unsigned)(v64 % 60); v64 /= 60;
   min = (unsigned)(v64 % 60); v64 /= 60;
@@ -354,6 +486,43 @@ static void PrintError(char *s)
   PrintLF();
 }
 
+static void PrintError_WRes(const char *message, WRes wres)
+{
+  Print("\nERROR: ");
+  Print(message);
+  PrintLF();
+  {
+    char s[32];
+    UIntToStr(s, (unsigned)wres, 1);
+    Print("System error code: ");
+    Print(s);
+  }
+  // sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres);
+  #ifdef _WIN32
+  {
+    char *s = NULL;
+    if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+        FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+        NULL, wres, 0, (LPSTR) &s, 0, NULL) != 0 && s)
+    {
+      Print(" : ");
+      Print(s);
+      LocalFree(s);
+    }
+  }
+  #else
+  {
+    const char *s = strerror(wres);
+    if (s)
+    {
+      Print(" : ");
+      Print(s);
+    }
+  }
+  #endif
+  PrintLF();
+}
+
 static void GetAttribString(UInt32 wa, BoolInt isDir, char *s)
 {
   #ifdef USE_WINDOWS_FILE
@@ -413,17 +582,22 @@ int MY_CDECL main(int numargs, char *args[])
   allocImp = g_Alloc;
   allocTempImp = g_Alloc;
 
-  #ifdef UNDER_CE
-  if (InFile_OpenW(&archiveStream.file, L"\test.7z"))
-  #else
-  if (InFile_Open(&archiveStream.file, args[2]))
-  #endif
   {
-    PrintError("can not open input file");
-    return 1;
+    WRes wres =
+    #ifdef UNDER_CE
+      InFile_OpenW(&archiveStream.file, L"\test.7z"); // change it
+    #else
+      InFile_Open(&archiveStream.file, args[2]);
+    #endif
+    if (wres != 0)
+    {
+      PrintError_WRes("cannot open input file", wres);
+      return 1;
+    }
   }
 
   FileInStream_CreateVTable(&archiveStream);
+  archiveStream.wres = 0;
   LookToRead2_CreateVTable(&lookStream, False);
   lookStream.buf = NULL;
 
@@ -483,7 +657,7 @@ int MY_CDECL main(int numargs, char *args[])
         size_t outSizeProcessed = 0;
         // const CSzFileItem *f = db.Files + i;
         size_t len;
-        unsigned isDir = SzArEx_IsDir(&db, i);
+        const BoolInt isDir = SzArEx_IsDir(&db, i);
         if (listCommand == 0 && isDir && !fullPaths)
           continue;
         len = SzArEx_GetFileNameUtf16(&db, i, NULL);
@@ -546,8 +720,8 @@ int MY_CDECL main(int numargs, char *args[])
         }
 
         Print(testCommand ?
-            "Testing    ":
-            "Extracting ");
+            "T ":
+            "- ");
         res = PrintString(temp);
         if (res != SZ_OK)
           break;
@@ -591,27 +765,37 @@ int MY_CDECL main(int numargs, char *args[])
             PrintLF();
             continue;
           }
-          else if (OutFile_OpenUtf16(&outFile, destPath))
+          else
           {
-            PrintError("can not open output file");
-            res = SZ_ERROR_FAIL;
-            break;
+            WRes wres = OutFile_OpenUtf16(&outFile, destPath);
+            if (wres != 0)
+            {
+              PrintError_WRes("cannot open output file", wres);
+              res = SZ_ERROR_FAIL;
+              break;
+            }
           }
 
           processedSize = outSizeProcessed;
           
-          if (File_Write(&outFile, outBuffer + offset, &processedSize) != 0 || processedSize != outSizeProcessed)
           {
-            PrintError("can not write output file");
-            res = SZ_ERROR_FAIL;
-            break;
+            WRes wres = File_Write(&outFile, outBuffer + offset, &processedSize);
+            if (wres != 0 || processedSize != outSizeProcessed)
+            {
+              PrintError_WRes("cannot write output file", wres);
+              res = SZ_ERROR_FAIL;
+              break;
+            }
           }
 
-          #ifdef USE_WINDOWS_FILE
           {
-            FILETIME mtime, ctime;
+            FILETIME mtime;
             FILETIME *mtimePtr = NULL;
+            
+            #ifdef USE_WINDOWS_FILE
+            FILETIME ctime;
             FILETIME *ctimePtr = NULL;
+            #endif
 
             if (SzBitWithVals_Check(&db.MTime, i))
             {
@@ -620,6 +804,8 @@ int MY_CDECL main(int numargs, char *args[])
               mtime.dwHighDateTime = (DWORD)(t->High);
               mtimePtr = &mtime;
             }
+
+            #ifdef USE_WINDOWS_FILE
             if (SzBitWithVals_Check(&db.CTime, i))
             {
               const CNtfsFileTime *t = &db.CTime.Vals[i];
@@ -627,16 +813,29 @@ int MY_CDECL main(int numargs, char *args[])
               ctime.dwHighDateTime = (DWORD)(t->High);
               ctimePtr = &ctime;
             }
+
             if (mtimePtr || ctimePtr)
               SetFileTime(outFile.handle, ctimePtr, NULL, mtimePtr);
-          }
-          #endif
+            #endif
           
-          if (File_Close(&outFile))
-          {
-            PrintError("can not close output file");
-            res = SZ_ERROR_FAIL;
-            break;
+            {
+              WRes wres = File_Close(&outFile);
+              if (wres != 0)
+              {
+                PrintError_WRes("cannot close output file", wres);
+                res = SZ_ERROR_FAIL;
+                break;
+              }
+            }
+
+            #ifndef USE_WINDOWS_FILE
+            #ifdef _WIN32
+            mtimePtr = mtimePtr;
+            #else
+            if (mtimePtr)
+              Set_File_FILETIME(destPath, mtimePtr);
+            #endif
+            #endif
           }
           
           #ifdef USE_WINDOWS_FILE
@@ -672,13 +871,15 @@ int MY_CDECL main(int numargs, char *args[])
   if (res == SZ_ERROR_UNSUPPORTED)
     PrintError("decoder doesn't support this archive");
   else if (res == SZ_ERROR_MEM)
-    PrintError("can not allocate memory");
+    PrintError("cannot allocate memory");
   else if (res == SZ_ERROR_CRC)
     PrintError("CRC error");
+  else if (res == SZ_ERROR_READ /* || archiveStream.Res != 0 */)
+    PrintError_WRes("Read Error", archiveStream.wres);
   else
   {
     char s[32];
-    UInt64ToStr(res, s, 0);
+    UInt64ToStr((unsigned)res, s, 0);
     PrintError(s);
   }
   
diff --git a/deps/LZMA-SDK/C/Util/7z/makefile.gcc b/deps/LZMA-SDK/C/Util/7z/makefile.gcc
index f707935aa..d6ef9b2a7 100644
--- a/deps/LZMA-SDK/C/Util/7z/makefile.gcc
+++ b/deps/LZMA-SDK/C/Util/7z/makefile.gcc
@@ -1,75 +1,34 @@
-PROG = 7zDec
-CXX = gcc
-LIB =
-RM = rm -f
-CFLAGS = -c -O2 -Wall
-
-OBJS = 7zMain.o 7zAlloc.o 7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o CpuArch.o Delta.o LzmaDec.o Lzma2Dec.o Bra.o Bra86.o BraIA64.o Bcj2.o Ppmd7.o Ppmd7Dec.o 7zFile.o 7zStream.o
-
-all: $(PROG)
-
-$(PROG): $(OBJS)
-	$(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB)
-
-7zMain.o: 7zMain.c
-	$(CXX) $(CFLAGS) 7zMain.c
-
-7zAlloc.o: ../../7zAlloc.c
-	$(CXX) $(CFLAGS) ../../7zAlloc.c
-
-7zArcIn.o: ../../7zArcIn.c
-	$(CXX) $(CFLAGS) ../../7zArcIn.c
-
-7zBuf.o: ../../7zBuf.c
-	$(CXX) $(CFLAGS) ../../7zBuf.c
-
-7zBuf2.o: ../../7zBuf2.c
-	$(CXX) $(CFLAGS) ../../7zBuf2.c
-
-7zCrc.o: ../../7zCrc.c
-	$(CXX) $(CFLAGS) ../../7zCrc.c
-
-7zCrcOpt.o: ../../7zCrc.c
-	$(CXX) $(CFLAGS) ../../7zCrcOpt.c
-
-7zDec.o: ../../7zDec.c
-	$(CXX) $(CFLAGS) -D_7ZIP_PPMD_SUPPPORT ../../7zDec.c
-
-CpuArch.o: ../../CpuArch.c
-	$(CXX) $(CFLAGS) ../../CpuArch.c
-
-Delta.o: ../../Delta.c
-	$(CXX) $(CFLAGS) ../../Delta.c
-
-LzmaDec.o: ../../LzmaDec.c
-	$(CXX) $(CFLAGS) ../../LzmaDec.c
-
-Lzma2Dec.o: ../../Lzma2Dec.c
-	$(CXX) $(CFLAGS) ../../Lzma2Dec.c
-
-Bra.o: ../../Bra.c
-	$(CXX) $(CFLAGS) ../../Bra.c
-
-Bra86.o: ../../Bra86.c
-	$(CXX) $(CFLAGS) ../../Bra86.c
-
-BraIA64.o: ../../BraIA64.c
-	$(CXX) $(CFLAGS) ../../BraIA64.c
-
-Bcj2.o: ../../Bcj2.c
-	$(CXX) $(CFLAGS) ../../Bcj2.c
-
-Ppmd7.o: ../../Ppmd7.c
-	$(CXX) $(CFLAGS) ../../Ppmd7.c
-
-Ppmd7Dec.o: ../../Ppmd7Dec.c
-	$(CXX) $(CFLAGS) ../../Ppmd7Dec.c
-
-7zFile.o: ../../7zFile.c
-	$(CXX) $(CFLAGS) ../../7zFile.c
-
-7zStream.o: ../../7zStream.c
-	$(CXX) $(CFLAGS) ../../7zStream.c
-
-clean:
-	-$(RM) $(PROG) $(OBJS)
+PROG = 7zdec
+
+LOCAL_FLAGS = -D_7ZIP_PPMD_SUPPPORT
+
+include ../../../CPP/7zip/LzmaDec_gcc.mak
+
+
+OBJS = \
+  $(LZMA_DEC_OPT_OBJS) \
+  $O/Bcj2.o \
+  $O/Bra.o \
+  $O/Bra86.o \
+  $O/BraIA64.o \
+  $O/CpuArch.o \
+  $O/Delta.o \
+  $O/Lzma2Dec.o \
+  $O/LzmaDec.o \
+  $O/Ppmd7.o \
+  $O/Ppmd7Dec.o \
+  $O/7zCrc.o \
+  $O/7zCrcOpt.o \
+  $O/Sha256.o \
+  $O/Sha256Opt.o \
+  $O/7zAlloc.o \
+  $O/7zArcIn.o \
+  $O/7zBuf.o \
+  $O/7zBuf2.o \
+  $O/7zDec.o \
+  $O/7zMain.o \
+  $O/7zFile.o \
+  $O/7zStream.o \
+
+
+include ../../7zip_gcc_c.mak
diff --git a/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c b/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c
index 82130e85d..6b4293e33 100644
--- a/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c
+++ b/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c
@@ -1,5 +1,5 @@
 /* LzmaUtil.c -- Test application for LZMA compression
-2018-07-04 : Igor Pavlov : Public domain */
+2021-02-15 : Igor Pavlov : Public domain */
 
 #include "../../Precomp.h"
 
@@ -15,9 +15,9 @@
 #include "../../LzmaDec.h"
 #include "../../LzmaEnc.h"
 
-static const char * const kCantReadMessage = "Can not read input file";
-static const char * const kCantWriteMessage = "Can not write output file";
-static const char * const kCantAllocateMessage = "Can not allocate memory";
+static const char * const kCantReadMessage = "Cannot read input file";
+static const char * const kCantWriteMessage = "Cannot write output file";
+static const char * const kCantAllocateMessage = "Cannot allocate memory";
 static const char * const kDataErrorMessage = "Data error";
 
 static void PrintHelp(char *buffer)
@@ -37,9 +37,25 @@ static int PrintError(char *buffer, const char *message)
   return 1;
 }
 
+static int PrintError_WRes(char *buffer, const char *message, WRes wres)
+{
+  strcat(buffer, "\nError: ");
+  strcat(buffer, message);
+  sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres);
+  #ifndef _WIN32
+  {
+    const char *s = strerror(wres);
+    if (s)
+      sprintf(buffer + strlen(buffer), " : %s", s);
+  }
+  #endif
+  strcat(buffer, "\n");
+  return 1;
+}
+
 static int PrintErrorNumber(char *buffer, SRes val)
 {
-  sprintf(buffer + strlen(buffer), "\nError code: %x\n", (unsigned)val);
+  sprintf(buffer + strlen(buffer), "\n7-Zip error code: %d\n", (unsigned)val);
   return 1;
 }
 
@@ -181,9 +197,11 @@ static int main2(int numArgs, const char *args[], char *rs)
 
   FileSeqInStream_CreateVTable(&inStream);
   File_Construct(&inStream.file);
+  inStream.wres = 0;
 
   FileOutStream_CreateVTable(&outStream);
   File_Construct(&outStream.file);
+  outStream.wres = 0;
 
   if (numArgs == 1)
   {
@@ -206,14 +224,19 @@ static int main2(int numArgs, const char *args[], char *rs)
       return PrintError(rs, "Incorrect UInt32 or UInt64");
   }
 
-  if (InFile_Open(&inStream.file, args[2]) != 0)
-    return PrintError(rs, "Can not open input file");
+  {
+    WRes wres = InFile_Open(&inStream.file, args[2]);
+    if (wres != 0)
+      return PrintError_WRes(rs, "Cannot open input file", wres);
+  }
 
   if (numArgs > 3)
   {
+    WRes wres;
     useOutFile = True;
-    if (OutFile_Open(&outStream.file, args[3]) != 0)
-      return PrintError(rs, "Can not open output file");
+    wres = OutFile_Open(&outStream.file, args[3]);
+    if (wres != 0)
+      return PrintError_WRes(rs, "Cannot open output file", wres);
   }
   else if (encodeMode)
     PrintUserError(rs);
@@ -221,7 +244,9 @@ static int main2(int numArgs, const char *args[], char *rs)
   if (encodeMode)
   {
     UInt64 fileSize;
-    File_GetLength(&inStream.file, &fileSize);
+    WRes wres = File_GetLength(&inStream.file, &fileSize);
+    if (wres != 0)
+      return PrintError_WRes(rs, "Cannot get file length", wres);
     res = Encode(&outStream.vt, &inStream.vt, fileSize, rs);
   }
   else
@@ -240,9 +265,9 @@ static int main2(int numArgs, const char *args[], char *rs)
     else if (res == SZ_ERROR_DATA)
       return PrintError(rs, kDataErrorMessage);
     else if (res == SZ_ERROR_WRITE)
-      return PrintError(rs, kCantWriteMessage);
+      return PrintError_WRes(rs, kCantWriteMessage, outStream.wres);
     else if (res == SZ_ERROR_READ)
-      return PrintError(rs, kCantReadMessage);
+      return PrintError_WRes(rs, kCantReadMessage, inStream.wres);
     return PrintErrorNumber(rs, res);
   }
   return 0;
diff --git a/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc b/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc
index 12a72bb8b..89b3e11f7 100644
--- a/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc
+++ b/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc
@@ -1,44 +1,19 @@
-PROG = lzma
-CXX = g++
-LIB =
-RM = rm -f
-CFLAGS = -c -O2 -Wall -D_7ZIP_ST
+PROG = 7lzma
 
-OBJS = \
-  LzmaUtil.o \
-  Alloc.o \
-  LzFind.o \
-  LzmaDec.o \
-  LzmaEnc.o \
-  7zFile.o \
-  7zStream.o \
-
-
-all: $(PROG)
-
-$(PROG): $(OBJS)
-	$(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB) $(LIB2)
-
-LzmaUtil.o: LzmaUtil.c
-	$(CXX) $(CFLAGS) LzmaUtil.c
-
-Alloc.o: ../../Alloc.c
-	$(CXX) $(CFLAGS) ../../Alloc.c
+include ../../../CPP/7zip/LzmaDec_gcc.mak
 
-LzFind.o: ../../LzFind.c
-	$(CXX) $(CFLAGS) ../../LzFind.c
 
-LzmaDec.o: ../../LzmaDec.c
-	$(CXX) $(CFLAGS) ../../LzmaDec.c
-
-LzmaEnc.o: ../../LzmaEnc.c
-	$(CXX) $(CFLAGS) ../../LzmaEnc.c
-
-7zFile.o: ../../7zFile.c
-	$(CXX) $(CFLAGS) ../../7zFile.c
-
-7zStream.o: ../../7zStream.c
-	$(CXX) $(CFLAGS) ../../7zStream.c
-
-clean:
-	-$(RM) $(PROG) $(OBJS)
+OBJS = \
+  $(LZMA_DEC_OPT_OBJS) \
+  $O/7zFile.o \
+  $O/7zStream.o \
+  $O/Alloc.o \
+  $O/LzFind.o \
+  $O/LzFindMt.o \
+  $O/LzmaDec.o \
+  $O/LzmaEnc.o \
+  $O/LzmaUtil.o \
+  $O/Threads.o \
+
+
+include ../../7zip_gcc_c.mak
diff --git a/deps/LZMA-SDK/C/Xz.c b/deps/LZMA-SDK/C/Xz.c
index 7e061d6e7..d6e2596a9 100644
--- a/deps/LZMA-SDK/C/Xz.c
+++ b/deps/LZMA-SDK/C/Xz.c
@@ -1,5 +1,5 @@
 /* Xz.c - Xz
-2017-05-12 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -41,7 +41,7 @@ void Xz_Free(CXzStream *p, ISzAllocPtr alloc)
 unsigned XzFlags_GetCheckSize(CXzStreamFlags f)
 {
   unsigned t = XzFlags_GetCheckType(f);
-  return (t == 0) ? 0 : (4 << ((t - 1) / 3));
+  return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3));
 }
 
 void XzCheck_Init(CXzCheck *p, unsigned mode)
diff --git a/deps/LZMA-SDK/C/Xz.h b/deps/LZMA-SDK/C/Xz.h
index fad56a3fb..cf9458e39 100644
--- a/deps/LZMA-SDK/C/Xz.h
+++ b/deps/LZMA-SDK/C/Xz.h
@@ -1,5 +1,5 @@
 /* Xz.h - Xz interface
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #ifndef __XZ_H
 #define __XZ_H
@@ -47,7 +47,7 @@ typedef struct
   CXzFilter filters[XZ_NUM_FILTERS_MAX];
 } CXzBlock;
 
-#define XzBlock_GetNumFilters(p) (((p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
+#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
 #define XzBlock_HasPackSize(p)   (((p)->flags & XZ_BF_PACK_SIZE) != 0)
 #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
 #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
@@ -277,7 +277,10 @@ void XzUnpacker_Free(CXzUnpacker *p);
     {
       XzUnpacker_Init()
       for()
+      {
         XzUnpacker_Code();
+      }
+      XzUnpacker_IsStreamWasFinished()
     }
     
   Interface-2 : Direct output buffer:
@@ -288,7 +291,10 @@ void XzUnpacker_Free(CXzUnpacker *p);
       XzUnpacker_Init()
       XzUnpacker_SetOutBufMode(); // to set output buffer and size
       for()
+      {
         XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
+      }
+      XzUnpacker_IsStreamWasFinished()
     }
 
   Interface-3 : Direct output buffer : One call full decoding
@@ -296,6 +302,7 @@ void XzUnpacker_Free(CXzUnpacker *p);
     It uses Interface-2 internally.
     {
       XzUnpacker_CodeFull()
+      XzUnpacker_IsStreamWasFinished()
     }
 */
 
@@ -309,8 +316,12 @@ Returns:
   SZ_OK
     status:
       CODER_STATUS_NOT_FINISHED,
-      CODER_STATUS_NEEDS_MORE_INPUT - maybe there are more xz streams,
-                                      call XzUnpacker_IsStreamWasFinished to check that current stream was finished
+      CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
+         1) it needs more input data to finish current xz stream
+         2) xz stream was finished successfully. But the decoder supports multiple
+            concatented xz streams. So it expects more input data for new xz streams.
+         Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
+
   SZ_ERROR_MEM  - Memory allocation error
   SZ_ERROR_DATA - Data error
   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
@@ -335,12 +346,17 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
     const Byte *src, SizeT *srcLen,
     ECoderFinishMode finishMode, ECoderStatus *status);
 
+/*
+If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
+after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
+*/
+
 BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
 
 /*
-XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes,
+XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
  if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
-These bytes can be some bytes after xz archive, or
+These bytes can be some data after xz archive, or
 it can be start of new xz stream.
  
 Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
@@ -371,19 +387,46 @@ BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
 
 
 
-/* ---------- Multi Threading Decoding ---------- */
+
+
+
+/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
+
+/*
+  if (CXzDecMtProps::numThreads > 1), the decoder can try to use
+  Multi-Threading. The decoder analyses xz block header, and if
+  there are pack size and unpack size values stored in xz block header,
+  the decoder reads compressed data of block to internal buffers,
+  and then it can start parallel decoding, if there are another blocks.
+  The decoder can switch back to Single-Thread decoding after some conditions.
+
+  The sequence of calls for xz decoding with in/out Streams:
+  {
+    XzDecMt_Create()
+    XzDecMtProps_Init(XzDecMtProps) to set default values of properties
+    // then you can change some XzDecMtProps parameters with required values
+    // here you can set the number of threads and (memUseMax) - the maximum
+    Memory usage for multithreading decoding.
+    for()
+    {
+      XzDecMt_Decode() // one call per one file
+    }
+    XzDecMt_Destroy()
+  }
+*/
 
 
 typedef struct
 {
-  size_t inBufSize_ST;
-  size_t outStep_ST;
-  BoolInt ignoreErrors;
+  size_t inBufSize_ST;    // size of input buffer for Single-Thread decoding
+  size_t outStep_ST;      // size of output buffer for Single-Thread decoding
+  BoolInt ignoreErrors;   // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
   
   #ifndef _7ZIP_ST
-  unsigned numThreads;
-  size_t inBufSize_MT;
-  size_t memUseMax;
+  unsigned numThreads;    // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
+  size_t inBufSize_MT;    // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
+  size_t memUseMax;       // the limit of total memory usage for Multi-Thread decoding.
+                          // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
   #endif
 } CXzDecMtProps;
 
@@ -393,7 +436,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p);
 typedef void * CXzDecMtHandle;
 
 /*
-  alloc    : XzDecMt uses CAlignOffsetAlloc for addresses allocated by (alloc).
+  alloc    : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
   allocMid : for big allocations, aligned allocation is better
 */
 
@@ -407,33 +450,46 @@ typedef struct
   Byte NumStreams_Defined;
   Byte NumBlocks_Defined;
 
-  Byte DataAfterEnd;
+  Byte DataAfterEnd;      // there are some additional data after good xz streams, and that data is not new xz stream.
   Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
 
-  UInt64 InSize;  // pack size processed
+  UInt64 InSize;          // pack size processed. That value doesn't include the data after
+                          // end of xz stream, if that data was not correct
   UInt64 OutSize;
 
   UInt64 NumStreams;
   UInt64 NumBlocks;
 
-  SRes DecodeRes;
-  SRes ReadRes;
-  SRes ProgressRes;
-  SRes CombinedRes;
-  SRes CombinedRes_Type;
+  SRes DecodeRes;         // the error code of xz streams data decoding
+  SRes ReadRes;           // error code from ISeqInStream:Read()
+  SRes ProgressRes;       // error code from ICompressProgress:Progress()
 
+  SRes CombinedRes;       // Combined result error code that shows main rusult
+                          // = S_OK, if there is no error.
+                          // but check also (DataAfterEnd) that can show additional minor errors.
+ 
+  SRes CombinedRes_Type;  // = SZ_ERROR_READ,     if error from ISeqInStream
+                          // = SZ_ERROR_PROGRESS, if error from ICompressProgress
+                          // = SZ_ERROR_WRITE,    if error from ISeqOutStream
+                          // = SZ_ERROR_* codes for decoding
 } CXzStatInfo;
 
 void XzStatInfo_Clear(CXzStatInfo *p);
 
 /*
+
 XzDecMt_Decode()
-SRes:
-  SZ_OK               - OK
+SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
+
+  SZ_OK               - no error
+                        check also output value in (stat->DataAfterEnd)
+                        that can show additional possible error
+
   SZ_ERROR_MEM        - Memory allocation error
   SZ_ERROR_NO_ARCHIVE - is not xz archive
   SZ_ERROR_ARCHIVE    - Headers error
   SZ_ERROR_DATA       - Data Error
+  SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
   SZ_ERROR_CRC        - CRC Error
   SZ_ERROR_INPUT_EOF  - it needs more input data
   SZ_ERROR_WRITE      - ISeqOutStream error
@@ -451,8 +507,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
     // Byte *outBuf, size_t *outBufSize,
     ISeqInStream *inStream,
     // const Byte *inData, size_t inDataSize,
-    CXzStatInfo *stat,
-    int *isMT,                 // 0 means that ST (Single-Thread) version was used
+    CXzStatInfo *stat,         // out: decoding results and statistics
+    int *isMT,                 // out: 0 means that ST (Single-Thread) version was used
+                               //      1 means that MT (Multi-Thread) version was used
     ICompressProgress *progress);
 
 EXTERN_C_END
diff --git a/deps/LZMA-SDK/C/XzCrc64Opt.c b/deps/LZMA-SDK/C/XzCrc64Opt.c
index 9273465d4..a0637dd22 100644
--- a/deps/LZMA-SDK/C/XzCrc64Opt.c
+++ b/deps/LZMA-SDK/C/XzCrc64Opt.c
@@ -1,5 +1,5 @@
 /* XzCrc64Opt.c -- CRC64 calculation
-2017-06-30 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -9,6 +9,7 @@
 
 #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
 
+UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
 UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
 {
   const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
     v = CRC64_UPDATE_BYTE_2(v, *p);
   for (; size >= 4; size -= 4, p += 4)
   {
-    UInt32 d = (UInt32)v ^ *(const UInt32 *)p;
+    UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
     v = (v >> 32)
         ^ (table + 0x300)[((d      ) & 0xFF)]
         ^ (table + 0x200)[((d >>  8) & 0xFF)]
@@ -45,6 +46,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
 
 #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
 
+UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
 UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
 {
   const Byte *p = (const Byte *)data;
@@ -54,7 +56,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size
     v = CRC64_UPDATE_BYTE_2_BE(v, *p);
   for (; size >= 4; size -= 4, p += 4)
   {
-    UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)p;
+    UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
     v = (v << 32)
         ^ (table + 0x000)[((d      ) & 0xFF)]
         ^ (table + 0x100)[((d >>  8) & 0xFF)]
diff --git a/deps/LZMA-SDK/C/XzDec.c b/deps/LZMA-SDK/C/XzDec.c
index 4f5327207..d345f68c1 100644
--- a/deps/LZMA-SDK/C/XzDec.c
+++ b/deps/LZMA-SDK/C/XzDec.c
@@ -1,5 +1,5 @@
 /* XzDec.c -- Xz Decode
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -240,6 +240,7 @@ static SRes BraState_Code2(void *pp,
 }
 
 
+SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc);
 SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc)
 {
   CBraState *decoder;
@@ -1038,7 +1039,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
             (p->outBuf ? NULL : dest), &destLen2, destFinish,
             src, &srcLen2, srcFinished2,
             finishMode2);
-        
+
         *status = p->decoder.status;
         XzCheck_Update(&p->check, (p->outBuf ? p->outBuf + p->outDataWritten : dest), destLen2);
         if (!p->outBuf)
@@ -1275,9 +1276,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
         }
         else
         {
+          const Byte *ptr = p->buf;
           p->state = XZ_STATE_STREAM_FOOTER;
           p->pos = 0;
-          if (CRC_GET_DIGEST(p->crc) != GetUi32(p->buf))
+          if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr))
             return SZ_ERROR_CRC;
         }
         break;
@@ -1456,7 +1458,6 @@ typedef struct
   ISeqInStream *inStream;
   ISeqOutStream *outStream;
   ICompressProgress *progress;
-  // CXzStatInfo *stat;
 
   BoolInt finishMode;
   BoolInt outSize_Defined;
@@ -1492,8 +1493,9 @@ typedef struct
   UInt64 numBlocks;
 
   // UInt64 numBadBlocks;
-  SRes mainErrorCode;
-
+  SRes mainErrorCode;  // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage
+                       // it can be = SZ_ERROR_INPUT_EOF
+                       // it can be = SZ_ERROR_DATA, in some another cases
   BoolInt isBlockHeaderState_Parse;
   BoolInt isBlockHeaderState_Write;
   UInt64 outProcessed_Parse;
@@ -1877,7 +1879,7 @@ static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex)
     {
       // if (res == SZ_ERROR_MEM) return res;
       if (me->props.ignoreErrors && res != SZ_ERROR_MEM)
-        return S_OK;
+        return SZ_OK;
       return res;
     }
   }
@@ -1898,15 +1900,18 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
   *outCodePos = coder->outCodeSize;
   *stop = True;
 
+  if (srcSize > coder->inPreSize - coder->inCodeSize)
+    return SZ_ERROR_FAIL;
+  
   if (coder->inCodeSize < coder->inPreHeaderSize)
   {
-    UInt64 rem = coder->inPreHeaderSize - coder->inCodeSize;
-    size_t step = srcSize;
-    if (step > rem)
-      step = (size_t)rem;
+    size_t step = coder->inPreHeaderSize - coder->inCodeSize;
+    if (step > srcSize)
+      step = srcSize;
     src += step;
     srcSize -= step;
     coder->inCodeSize += step;
+    *inCodePos = coder->inCodeSize;
     if (coder->inCodeSize < coder->inPreHeaderSize)
     {
       *stop = False;
@@ -1956,7 +1961,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
   {
     *inCodePos = coder->inPreSize;
     *outCodePos = coder->outPreSize;
-    return S_OK;
+    return SZ_OK;
   }
   return coder->codeRes;
 }
@@ -1966,7 +1971,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
 
 static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
     BoolInt needWriteToStream,
-    const Byte *src, size_t srcSize,
+    const Byte *src, size_t srcSize, BoolInt isCross,
     // int srcFinished,
     BoolInt *needContinue,
     BoolInt *canRecode)
@@ -1985,7 +1990,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
   if (!coder->dec.headerParsedOk || !coder->outBuf)
   {
     if (me->finishedDecoderIndex < 0)
-      me->finishedDecoderIndex = coderIndex;
+      me->finishedDecoderIndex = (int)coderIndex;
     return SZ_OK;
   }
 
@@ -2077,7 +2082,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
     if (coder->codeRes != SZ_OK)
       if (!me->props.ignoreErrors)
       {
-        me->finishedDecoderIndex = coderIndex;
+        me->finishedDecoderIndex = (int)coderIndex;
         return res;
       }
 
@@ -2086,7 +2091,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
     if (coder->inPreSize != coder->inCodeSize
         || coder->blockPackTotal != coder->inCodeSize)
     {
-      me->finishedDecoderIndex = coderIndex;
+      me->finishedDecoderIndex = (int)coderIndex;
       return SZ_OK;
     }
 
@@ -2125,22 +2130,41 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
         return SZ_OK;
       }
       
+      /*
+      We have processed all xz-blocks of stream,
+      And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where
+      (src) is a pointer to xz-Index structure.
+      We finish reading of current xz-Stream, including Zero padding after xz-Stream.
+      We exit, if we reach extra byte (first byte of new-Stream or another data).
+      But we don't update input stream pointer for that new extra byte.
+      If extra byte is not correct first byte of xz-signature,
+      we have SZ_ERROR_NO_ARCHIVE error here.
+      */
+
       res = XzUnpacker_Code(dec,
           NULL, &outSizeCur,
           src, &srcProcessed,
           me->mtc.readWasFinished, // srcFinished
           CODER_FINISH_END, // CODER_FINISH_ANY,
           &status);
+
+      // res = SZ_ERROR_ARCHIVE; // for failure test
       
       me->status = status;
       me->codeRes = res;
 
+      if (isCross)
+        me->mtc.crossStart += srcProcessed;
+
       me->mtc.inProcessed += srcProcessed;
       me->mtc.mtProgress.totalInSize = me->mtc.inProcessed;
 
+      srcSize -= srcProcessed;
+      src += srcProcessed;
+
       if (res != SZ_OK)
       {
-        return S_OK;
+        return SZ_OK;
         // return res;
       }
       
@@ -2149,20 +2173,26 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
         *needContinue = True;
         me->isBlockHeaderState_Parse = False;
         me->isBlockHeaderState_Write = False;
+
+        if (!isCross)
         {
           Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc);
           if (!crossBuf)
             return SZ_ERROR_MEM;
-          memcpy(crossBuf, src + srcProcessed, srcSize - srcProcessed);
+          if (srcSize != 0)
+            memcpy(crossBuf, src, srcSize);
+          me->mtc.crossStart = 0;
+          me->mtc.crossEnd = srcSize;
         }
-        me->mtc.crossStart = 0;
-        me->mtc.crossEnd = srcSize - srcProcessed;
+
+        PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd);
+
         return SZ_OK;
       }
       
-      if (status != CODER_STATUS_NEEDS_MORE_INPUT)
+      if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0)
       {
-        return E_FAIL;
+        return SZ_ERROR_FAIL;
       }
       
       if (me->mtc.readWasFinished)
@@ -2174,7 +2204,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
     {
       size_t inPos;
       size_t inLim;
-      const Byte *inData;
+      // const Byte *inData;
       UInt64 inProgressPrev = me->mtc.inProcessed;
       
       // XzDecMt_Prepare_InBuf_ST(p);
@@ -2184,9 +2214,8 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
       
       inPos = 0;
       inLim = 0;
-      // outProcessed = 0;
       
-      inData = crossBuf;
+      // inData = crossBuf;
       
       for (;;)
       {
@@ -2201,7 +2230,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
           {
             inPos = 0;
             inLim = me->mtc.inBufSize;
-            me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)inData, &inLim);
+            me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim);
             me->mtc.readProcessed += inLim;
             if (inLim == 0 || me->mtc.readRes != SZ_OK)
               me->mtc.readWasFinished = True;
@@ -2213,7 +2242,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
 
         res = XzUnpacker_Code(dec,
             NULL, &outProcessed,
-            inData + inPos, &inProcessed,
+            crossBuf + inPos, &inProcessed,
             (inProcessed == 0), // srcFinished
             CODER_FINISH_END, &status);
         
@@ -2225,7 +2254,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
 
         if (res != SZ_OK)
         {
-          return S_OK;
+          return SZ_OK;
           // return res;
         }
 
@@ -2240,7 +2269,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
         }
         
         if (status != CODER_STATUS_NEEDS_MORE_INPUT)
-          return E_FAIL;
+          return SZ_ERROR_FAIL;
         
         if (me->mtc.progress)
         {
@@ -2276,13 +2305,6 @@ void XzStatInfo_Clear(CXzStatInfo *p)
   p->NumStreams_Defined = False;
   p->NumBlocks_Defined = False;
   
-  // p->IsArc = False;
-  // p->UnexpectedEnd = False;
-  // p->Unsupported = False;
-  // p->HeadersError = False;
-  // p->DataError = False;
-  // p->CrcError = False;
-
   p->DataAfterEnd = False;
   p->DecodingTruncated = False;
   
@@ -2296,6 +2318,16 @@ void XzStatInfo_Clear(CXzStatInfo *p)
 
 
 
+/*
+  XzDecMt_Decode_ST() can return SZ_OK or the following errors
+     - SZ_ERROR_MEM for memory allocation error
+     - error from XzUnpacker_Code() function
+     - SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case
+     - ICompressProgress::Progress() error,  stat->CombinedRes_Type = SZ_ERROR_PROGRESS.
+  But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors.
+  ISeqInStream::Read() result is set to p->readRes.
+  also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
+*/
 
 static SRes XzDecMt_Decode_ST(CXzDecMt *p
     #ifndef _7ZIP_ST
@@ -2384,7 +2416,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
         inPos = 0;
         inLim = p->inBufSize;
         inData = p->inBuf;
-        p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
+        p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim);
         p->readProcessed += inLim;
         if (inLim == 0 || p->readRes != SZ_OK)
           p->readWasFinished = True;
@@ -2426,8 +2458,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
     if (finished || outProcessed >= outSize)
       if (outPos != 0)
       {
-        size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
-        p->outProcessed += written;
+        const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
+        // p->outProcessed += written; // 21.01: BUG fixed
         if (written != outPos)
         {
           stat->CombinedRes_Type = SZ_ERROR_WRITE;
@@ -2438,9 +2470,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
 
     if (p->progress && res == SZ_OK)
     {
-      UInt64 inDelta = p->inProcessed - inPrev;
-      UInt64 outDelta = p->outProcessed - outPrev;
-      if (inDelta >= (1 << 22) || outDelta >= (1 << 22))
+      if (p->inProcessed - inPrev >= (1 << 22) ||
+          p->outProcessed - outPrev >= (1 << 22))
       {
         res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed);
         if (res != SZ_OK)
@@ -2455,14 +2486,31 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
     }
 
     if (finished)
-      return res;
+    {
+      // p->codeRes is preliminary error from XzUnpacker_Code.
+      // and it can be corrected later as final result
+      // so we return SZ_OK here instead of (res);
+      return SZ_OK;
+      // return res;
+    }
   }
 }
 
-static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
+
+
+/*
+XzStatInfo_SetStat() transforms
+    CXzUnpacker return code and status to combined CXzStatInfo results.
+    it can convert SZ_OK to SZ_ERROR_INPUT_EOF
+    it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1)
+*/
+
+static void XzStatInfo_SetStat(const CXzUnpacker *dec,
     int finishMode,
-    UInt64 readProcessed, UInt64 inProcessed,
-    SRes res, ECoderStatus status,
+    // UInt64 readProcessed,
+    UInt64 inProcessed,
+    SRes res,                     // it's result from CXzUnpacker unpacker
+    ECoderStatus status,
     BoolInt decodingTruncated,
     CXzStatInfo *stat)
 {
@@ -2484,12 +2532,20 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
     if (status == CODER_STATUS_NEEDS_MORE_INPUT)
     {
       // CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams
+      // any extra data is part of correct data
       extraSize = 0;
+      // if xz stream was not finished, then we need more data
       if (!XzUnpacker_IsStreamWasFinished(dec))
         res = SZ_ERROR_INPUT_EOF;
     }
-    else if (!decodingTruncated || finishMode) // (status == CODER_STATUS_NOT_FINISHED)
-      res = SZ_ERROR_DATA;
+    else
+    {
+      // CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding
+      // so he we have (status == CODER_STATUS_NOT_FINISHED)
+      // if (status != CODER_STATUS_FINISHED_WITH_MARK)
+      if (!decodingTruncated || finishMode)
+        res = SZ_ERROR_DATA;
+    }
   }
   else if (res == SZ_ERROR_NO_ARCHIVE)
   {
@@ -2497,24 +2553,29 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
     SZ_ERROR_NO_ARCHIVE is possible for 2 states:
       XZ_STATE_STREAM_HEADER  - if bad signature or bad CRC
       XZ_STATE_STREAM_PADDING - if non-zero padding data
-    extraSize / inProcessed don't include "bad" byte
+    extraSize and inProcessed don't include "bad" byte
     */
-    if (inProcessed != extraSize) // if good streams before error
-      if (extraSize != 0 || readProcessed != inProcessed)
+    // if (inProcessed == extraSize), there was no any good xz stream header, and we keep error
+    if (inProcessed != extraSize) // if there were good xz streams before error
+    {
+      // if (extraSize != 0 || readProcessed != inProcessed)
       {
+        // he we suppose that all xz streams were finsihed OK, and we have
+        // some extra data after all streams
         stat->DataAfterEnd = True;
-        // there is some good xz stream before. So we set SZ_OK
         res = SZ_OK;
       }
+    }
   }
   
-  stat->DecodeRes = res;
+  if (stat->DecodeRes == SZ_OK)
+    stat->DecodeRes = res;
 
   stat->InSize -= extraSize;
-  return res;
 }
 
 
+
 SRes XzDecMt_Decode(CXzDecMtHandle pp,
     const CXzDecMtProps *props,
     const UInt64 *outDataSize, int finishMode,
@@ -2557,8 +2618,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
   p->inProcessed = 0;
   p->readProcessed = 0;
   p->readWasFinished = False;
+  p->readRes = SZ_OK;
 
-  p->codeRes = 0;
+  p->codeRes = SZ_OK;
   p->status = CODER_STATUS_NOT_SPECIFIED;
 
   XzUnpacker_Init(&p->dec);
@@ -2589,8 +2651,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
 
   if (p->props.numThreads > 1)
   {
-    IMtDecCallback vt;
-
+    IMtDecCallback2 vt;
+    BoolInt needContinue;
+    SRes res;
     // we just free ST buffers here
     // but we still keep state variables, that was set in XzUnpacker_Init()
     XzDecMt_FreeSt(p);
@@ -2628,45 +2691,45 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
     vt.Code = XzDecMt_Callback_Code;
     vt.Write = XzDecMt_Callback_Write;
 
-    {
-      BoolInt needContinue;
-      
-      SRes res = MtDec_Code(&p->mtc);
-
-      stat->InSize = p->mtc.inProcessed;
 
-      p->inProcessed = p->mtc.inProcessed;
-      p->readRes = p->mtc.readRes;
-      p->readWasFinished = p->mtc.readWasFinished;
-      p->readProcessed = p->mtc.readProcessed;
+    res = MtDec_Code(&p->mtc);
 
-      tMode = True;
-      needContinue = False;
 
-      if (res == SZ_OK)
+    stat->InSize = p->mtc.inProcessed;
+    
+    p->inProcessed = p->mtc.inProcessed;
+    p->readRes = p->mtc.readRes;
+    p->readWasFinished = p->mtc.readWasFinished;
+    p->readProcessed = p->mtc.readProcessed;
+    
+    tMode = True;
+    needContinue = False;
+    
+    if (res == SZ_OK)
+    {
+      if (p->mtc.mtProgress.res != SZ_OK)
       {
-        if (p->mtc.mtProgress.res != SZ_OK)
-        {
-          res = p->mtc.mtProgress.res;
-          stat->ProgressRes = res;
-          stat->CombinedRes_Type = SZ_ERROR_PROGRESS;
-        }
-        else
-          needContinue = p->mtc.needContinue;
+        res = p->mtc.mtProgress.res;
+        stat->ProgressRes = res;
+        stat->CombinedRes_Type = SZ_ERROR_PROGRESS;
       }
-
-      if (!needContinue)
+      else
+        needContinue = p->mtc.needContinue;
+    }
+    
+    if (!needContinue)
+    {
       {
         SRes codeRes;
         BoolInt truncated = False;
         ECoderStatus status;
-        CXzUnpacker *dec;
+        const CXzUnpacker *dec;
 
         stat->OutSize = p->outProcessed;
        
         if (p->finishedDecoderIndex >= 0)
         {
-          CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
+          const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
           codeRes = coder->codeRes;
           dec = &coder->dec;
           status = coder->status;
@@ -2679,41 +2742,46 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
           truncated = p->parsing_Truncated;
         }
         else
-          return E_FAIL;
+          return SZ_ERROR_FAIL;
+
+        if (p->mainErrorCode != SZ_OK)
+          stat->DecodeRes = p->mainErrorCode;
 
         XzStatInfo_SetStat(dec, p->finishMode,
-            p->mtc.readProcessed, p->mtc.inProcessed,
+            // p->mtc.readProcessed,
+            p->mtc.inProcessed,
             codeRes, status,
             truncated,
             stat);
+      }
 
-        if (res == SZ_OK)
+      if (res == SZ_OK)
+      {
+        stat->ReadRes = p->mtc.readRes;
+
+        if (p->writeRes != SZ_OK)
         {
-          if (p->writeRes != SZ_OK)
-          {
-            res = p->writeRes;
-            stat->CombinedRes_Type = SZ_ERROR_WRITE;
-          }
-          else if (p->mtc.readRes != SZ_OK && p->mtc.inProcessed == p->mtc.readProcessed)
-          {
-            res = p->mtc.readRes;
-            stat->ReadRes = res;
-            stat->CombinedRes_Type = SZ_ERROR_READ;
-          }
-          else if (p->mainErrorCode != SZ_OK)
-          {
-            res = p->mainErrorCode;
-          }
+          res = p->writeRes;
+          stat->CombinedRes_Type = SZ_ERROR_WRITE;
         }
-
-        stat->CombinedRes = res;
-        if (stat->CombinedRes_Type == SZ_OK)
-          stat->CombinedRes_Type = res;
-        return res;
+        else if (p->mtc.readRes != SZ_OK
+            // && p->mtc.inProcessed == p->mtc.readProcessed
+            && stat->DecodeRes == SZ_ERROR_INPUT_EOF)
+        {
+          res = p->mtc.readRes;
+          stat->CombinedRes_Type = SZ_ERROR_READ;
+        }
+        else if (stat->DecodeRes != SZ_OK)
+          res = stat->DecodeRes;
       }
-
-      PRF_STR("----- decoding ST -----");
+      
+      stat->CombinedRes = res;
+      if (stat->CombinedRes_Type == SZ_OK)
+        stat->CombinedRes_Type = res;
+      return res;
     }
+
+    PRF_STR("----- decoding ST -----");
   }
 
   #endif
@@ -2729,33 +2797,35 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
         , stat
         );
 
+    #ifndef _7ZIP_ST
+    // we must set error code from MT decoding at first
+    if (p->mainErrorCode != SZ_OK)
+      stat->DecodeRes = p->mainErrorCode;
+    #endif
+
     XzStatInfo_SetStat(&p->dec,
         p->finishMode,
-        p->readProcessed, p->inProcessed,
+        // p->readProcessed,
+        p->inProcessed,
         p->codeRes, p->status,
         False, // truncated
         stat);
 
+    stat->ReadRes = p->readRes;
+
     if (res == SZ_OK)
     {
-      /*
-      if (p->writeRes != SZ_OK)
-      {
-        res = p->writeRes;
-        stat->CombinedRes_Type = SZ_ERROR_WRITE;
-      }
-      else
-      */
-      if (p->readRes != SZ_OK && p->inProcessed == p->readProcessed)
+      if (p->readRes != SZ_OK
+          // && p->inProcessed == p->readProcessed
+          && stat->DecodeRes == SZ_ERROR_INPUT_EOF)
       {
+        // we set read error as combined error, only if that error was the reason
+        // of decoding problem
         res = p->readRes;
-        stat->ReadRes = res;
         stat->CombinedRes_Type = SZ_ERROR_READ;
       }
-      #ifndef _7ZIP_ST
-      else if (p->mainErrorCode != SZ_OK)
-        res = p->mainErrorCode;
-      #endif
+      else if (stat->DecodeRes != SZ_OK)
+        res = stat->DecodeRes;
     }
 
     stat->CombinedRes = res;
diff --git a/deps/LZMA-SDK/C/XzEnc.c b/deps/LZMA-SDK/C/XzEnc.c
index 309eca949..759ba670e 100644
--- a/deps/LZMA-SDK/C/XzEnc.c
+++ b/deps/LZMA-SDK/C/XzEnc.c
@@ -1,5 +1,5 @@
 /* XzEnc.c -- Xz Encode
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -36,7 +36,7 @@
 
 
 #define XzBlock_ClearFlags(p)       (p)->flags = 0;
-#define XzBlock_SetNumFilters(p, n) (p)->flags |= ((n) - 1);
+#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1));
 #define XzBlock_SetHasPackSize(p)   (p)->flags |= XZ_BF_PACK_SIZE;
 #define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE;
 
@@ -552,7 +552,7 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p)
         numBlocks++;
       if (numBlocks < (unsigned)t2)
       {
-        t2r = (unsigned)numBlocks;
+        t2r = (int)numBlocks;
         if (t2r == 0)
           t2r = 1;
         t3 = t1 * t2r;
@@ -751,7 +751,8 @@ static SRes Xz_CompressBlock(
     }
     else if (fp->ipDefined)
     {
-      SetUi32(filter->props, fp->ip);
+      Byte *ptr = filter->props;
+      SetUi32(ptr, fp->ip);
       filter->propsSize = 4;
     }
   }
@@ -1196,7 +1197,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
       p->outBufSize = destBlockSize;
     }
 
-    p->mtCoder.numThreadsMax = props->numBlockThreads_Max;
+    p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max;
     p->mtCoder.expectedDataSize = p->expectedDataSize;
     
     RINOK(MtCoder_Code(&p->mtCoder));
diff --git a/deps/LZMA-SDK/C/XzIn.c b/deps/LZMA-SDK/C/XzIn.c
index 792a61786..54d81c4a4 100644
--- a/deps/LZMA-SDK/C/XzIn.c
+++ b/deps/LZMA-SDK/C/XzIn.c
@@ -1,5 +1,5 @@
 /* XzIn.c - Xz input
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -152,7 +152,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
 {
   UInt64 indexSize;
   Byte buf[XZ_STREAM_FOOTER_SIZE];
-  UInt64 pos = *startOffset;
+  UInt64 pos = (UInt64)*startOffset;
 
   if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE)
     return SZ_ERROR_NO_ARCHIVE;
@@ -202,8 +202,13 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
   if (!XzFlags_IsSupported(p->flags))
     return SZ_ERROR_UNSUPPORTED;
 
-  if (GetUi32(buf) != CrcCalc(buf + 4, 6))
-    return SZ_ERROR_ARCHIVE;
+  {
+    /* to eliminate GCC 6.3 warning:
+       dereferencing type-punned pointer will break strict-aliasing rules */
+    const Byte *buf_ptr = buf;
+    if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6))
+      return SZ_ERROR_ARCHIVE;
+  }
 
   indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2;
 
@@ -222,7 +227,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
       return SZ_ERROR_ARCHIVE;
     pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
     RINOK(LookInStream_SeekTo(stream, pos));
-    *startOffset = pos;
+    *startOffset = (Int64)pos;
   }
   {
     CXzStreamFlags headerFlags;
@@ -294,12 +299,12 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
     SRes res;
     Xz_Construct(&st);
     res = Xz_ReadBackward(&st, stream, startOffset, alloc);
-    st.startOffset = *startOffset;
+    st.startOffset = (UInt64)*startOffset;
     RINOK(res);
     if (p->num == p->numAllocated)
     {
-      size_t newNum = p->num + p->num / 4 + 1;
-      Byte *data = (Byte *)ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
+      const size_t newNum = p->num + p->num / 4 + 1;
+      void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
       if (!data)
         return SZ_ERROR_MEM;
       p->numAllocated = newNum;
@@ -311,8 +316,8 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
     p->streams[p->num++] = st;
     if (*startOffset == 0)
       break;
-    RINOK(LookInStream_SeekTo(stream, *startOffset));
-    if (progress && ICompressProgress_Progress(progress, endOffset - *startOffset, (UInt64)(Int64)-1) != SZ_OK)
+    RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset));
+    if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
       return SZ_ERROR_PROGRESS;
   }
   return SZ_OK;
diff --git a/deps/LZMA-SDK/C/var_clang.mak b/deps/LZMA-SDK/C/var_clang.mak
new file mode 100644
index 000000000..ee265698e
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_clang.mak
@@ -0,0 +1,11 @@
+PLATFORM=
+O=b/c
+IS_X64=
+IS_X86=
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=
+USE_ASM=
+CC=$(CROSS_COMPILE)clang
+CXX=$(CROSS_COMPILE)clang++
+USE_CLANG=1
diff --git a/deps/LZMA-SDK/C/var_clang_arm64.mak b/deps/LZMA-SDK/C/var_clang_arm64.mak
new file mode 100644
index 000000000..1e82d2eb6
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_clang_arm64.mak
@@ -0,0 +1,11 @@
+PLATFORM=arm64
+O=b/c_$(PLATFORM)
+IS_X64=
+IS_X86=
+IS_ARM64=1
+CROSS_COMPILE=
+MY_ARCH=
+USE_ASM=1
+CC=$(CROSS_COMPILE)clang
+CXX=$(CROSS_COMPILE)clang++
+USE_CLANG=1
diff --git a/deps/LZMA-SDK/C/var_clang_x64.mak b/deps/LZMA-SDK/C/var_clang_x64.mak
new file mode 100644
index 000000000..d9013e1cd
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_clang_x64.mak
@@ -0,0 +1,12 @@
+PLATFORM=x64
+O=b/c_$(PLATFORM)
+IS_X64=1
+IS_X86=
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=
+USE_ASM=1
+CC=$(CROSS_COMPILE)clang
+CXX=$(CROSS_COMPILE)clang++
+USE_CLANG=1
+
diff --git a/deps/LZMA-SDK/C/var_clang_x86.mak b/deps/LZMA-SDK/C/var_clang_x86.mak
new file mode 100644
index 000000000..9ab916a70
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_clang_x86.mak
@@ -0,0 +1,12 @@
+PLATFORM=x86
+O=b/c_$(PLATFORM)
+IS_X64=
+IS_X86=1
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=-m32
+USE_ASM=1
+CC=$(CROSS_COMPILE)clang
+CXX=$(CROSS_COMPILE)clang++
+USE_CLANG=1
+
diff --git a/deps/LZMA-SDK/C/var_gcc.mak b/deps/LZMA-SDK/C/var_gcc.mak
new file mode 100644
index 000000000..803c8de9d
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_gcc.mak
@@ -0,0 +1,12 @@
+PLATFORM=
+O=b/g
+IS_X64=
+IS_X86=
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=
+USE_ASM=
+CC=$(CROSS_COMPILE)gcc
+CXX=$(CROSS_COMPILE)g++
+
+# -march=armv8-a+crc+crypto
diff --git a/deps/LZMA-SDK/C/var_gcc_arm64.mak b/deps/LZMA-SDK/C/var_gcc_arm64.mak
new file mode 100644
index 000000000..562cfaa6d
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_gcc_arm64.mak
@@ -0,0 +1,12 @@
+PLATFORM=arm64
+O=b/g_$(PLATFORM)
+IS_X64=
+IS_X86=
+IS_ARM64=1
+CROSS_COMPILE=
+MY_ARCH=-mtune=cortex-a53
+USE_ASM=1
+CC=$(CROSS_COMPILE)gcc
+CXX=$(CROSS_COMPILE)g++
+
+# -march=armv8-a+crc+crypto
diff --git a/deps/LZMA-SDK/C/var_gcc_x64.mak b/deps/LZMA-SDK/C/var_gcc_x64.mak
new file mode 100644
index 000000000..1b965b21b
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_gcc_x64.mak
@@ -0,0 +1,10 @@
+PLATFORM=x64
+O=b/g_$(PLATFORM)
+IS_X64=1
+IS_X86=
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=
+USE_ASM=1
+CC=$(CROSS_COMPILE)gcc
+CXX=$(CROSS_COMPILE)g++
diff --git a/deps/LZMA-SDK/C/var_gcc_x86.mak b/deps/LZMA-SDK/C/var_gcc_x86.mak
new file mode 100644
index 000000000..9eada64e1
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_gcc_x86.mak
@@ -0,0 +1,11 @@
+PLATFORM=x86
+O=b/g_$(PLATFORM)
+IS_X64=
+IS_X86=1
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=-m32
+USE_ASM=1
+CC=$(CROSS_COMPILE)gcc
+CXX=$(CROSS_COMPILE)g++
+
diff --git a/deps/LZMA-SDK/C/var_mac_arm64.mak b/deps/LZMA-SDK/C/var_mac_arm64.mak
new file mode 100644
index 000000000..0ba414230
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_mac_arm64.mak
@@ -0,0 +1,11 @@
+PLATFORM=arm64
+O=b/m_$(PLATFORM)
+IS_X64=
+IS_X86=
+IS_ARM64=1
+CROSS_COMPILE=
+MY_ARCH=-arch arm64
+USE_ASM=1
+CC=$(CROSS_COMPILE)clang
+CXX=$(CROSS_COMPILE)clang++
+USE_CLANG=1
diff --git a/deps/LZMA-SDK/C/var_mac_x64.mak b/deps/LZMA-SDK/C/var_mac_x64.mak
new file mode 100644
index 000000000..92b15c8b7
--- /dev/null
+++ b/deps/LZMA-SDK/C/var_mac_x64.mak
@@ -0,0 +1,11 @@
+PLATFORM=x64
+O=b/m_$(PLATFORM)
+IS_X64=1
+IS_X86=
+IS_ARM64=
+CROSS_COMPILE=
+MY_ARCH=-arch x86_64
+USE_ASM=
+CC=$(CROSS_COMPILE)clang
+CXX=$(CROSS_COMPILE)clang++
+USE_CLANG=1
diff --git a/deps/LZMA-SDK/C/warn_clang.mak b/deps/LZMA-SDK/C/warn_clang.mak
new file mode 100644
index 000000000..a299fbc4d
--- /dev/null
+++ b/deps/LZMA-SDK/C/warn_clang.mak
@@ -0,0 +1,37 @@
+CFLAGS_WARN_CLANG_3_8_UNIQ = \
+  -Wno-reserved-id-macro \
+  -Wno-old-style-cast \
+  -Wno-c++11-long-long \
+  -Wno-unused-macros \
+
+CFLAGS_WARN_CLANG_3_8 = \
+  $(CFLAGS_WARN_CLANG_3_8_UNIQ) \
+  -Weverything \
+  -Wno-extra-semi \
+  -Wno-sign-conversion \
+  -Wno-language-extension-token \
+  -Wno-global-constructors \
+  -Wno-non-virtual-dtor \
+  -Wno-switch-enum \
+  -Wno-covered-switch-default \
+  -Wno-cast-qual \
+  -Wno-padded \
+  -Wno-exit-time-destructors \
+  -Wno-weak-vtables \
+
+CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \
+  -Wno-extra-semi-stmt \
+  -Wno-zero-as-null-pointer-constant \
+  -Wno-deprecated-dynamic-exception-spec \
+  -Wno-c++98-compat-pedantic \
+  -Wno-atomic-implicit-seq-cst \
+  -Wconversion \
+  -Wno-sign-conversion \
+
+CFLAGS_WARN_1 = \
+  -Wno-deprecated-copy-dtor \
+
+
+
+
+CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_1)
diff --git a/deps/LZMA-SDK/C/warn_clang_mac.mak b/deps/LZMA-SDK/C/warn_clang_mac.mak
new file mode 100644
index 000000000..cfbbda073
--- /dev/null
+++ b/deps/LZMA-SDK/C/warn_clang_mac.mak
@@ -0,0 +1,37 @@
+CFLAGS_WARN_CLANG_3_8_UNIQ = \
+  -Wno-reserved-id-macro \
+  -Wno-old-style-cast \
+  -Wno-c++11-long-long \
+  -Wno-unused-macros \
+
+CFLAGS_WARN_CLANG_3_8 = \
+  $(CFLAGS_WARN_CLANG_3_8_UNIQ) \
+  -Weverything \
+  -Wno-extra-semi \
+  -Wno-sign-conversion \
+  -Wno-language-extension-token \
+  -Wno-global-constructors \
+  -Wno-non-virtual-dtor \
+  -Wno-switch-enum \
+  -Wno-covered-switch-default \
+  -Wno-cast-qual \
+  -Wno-padded \
+  -Wno-exit-time-destructors \
+  -Wno-weak-vtables \
+
+CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \
+  -Wno-extra-semi-stmt \
+  -Wno-zero-as-null-pointer-constant \
+  -Wno-deprecated-dynamic-exception-spec \
+  -Wno-c++98-compat-pedantic \
+  -Wno-atomic-implicit-seq-cst \
+  -Wconversion \
+  -Wno-sign-conversion \
+
+CFLAGS_WARN_MAC = \
+  -Wno-poison-system-directories \
+  -Wno-c++11-long-long \
+  -Wno-atomic-implicit-seq-cst \
+
+
+CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_MAC)
diff --git a/deps/LZMA-SDK/C/warn_gcc.mak b/deps/LZMA-SDK/C/warn_gcc.mak
new file mode 100644
index 000000000..3ae796480
--- /dev/null
+++ b/deps/LZMA-SDK/C/warn_gcc.mak
@@ -0,0 +1,53 @@
+CFLAGS_WARN_GCC_4_5 = \
+
+CFLAGS_WARN_GCC_6 = \
+  -Waddress \
+  -Waggressive-loop-optimizations \
+  -Wattributes \
+  -Wbool-compare \
+  -Wcast-align \
+  -Wcomment \
+  -Wdiv-by-zero \
+  -Wduplicated-cond \
+  -Wformat-contains-nul \
+  -Winit-self \
+  -Wint-to-pointer-cast \
+  -Wunused \
+  -Wunused-macros \
+
+#  -Wno-strict-aliasing
+
+CFLAGS_WARN_GCC_9 = \
+  -Waddress \
+  -Waddress-of-packed-member \
+  -Waggressive-loop-optimizations \
+  -Wattributes \
+  -Wbool-compare \
+  -Wbool-operation \
+  -Wcast-align \
+  -Wcast-align=strict \
+  -Wcomment \
+  -Wdangling-else \
+  -Wdiv-by-zero \
+  -Wduplicated-branches \
+  -Wduplicated-cond \
+  -Wformat-contains-nul \
+  -Wimplicit-fallthrough=5 \
+  -Winit-self \
+  -Wint-in-bool-context \
+  -Wint-to-pointer-cast \
+  -Wunused \
+  -Wunused-macros \
+  -Wconversion \
+
+#  -Wno-sign-conversion \
+
+CFLAGS_WARN_GCC_PPMD_UNALIGNED = \
+  -Wno-strict-aliasing \
+
+
+CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \
+
+#  $(CFLAGS_WARN_GCC_PPMD_UNALIGNED)
+
+  
\ No newline at end of file
diff --git a/deps/LZMA-SDK/DOC/7zFormat.txt b/deps/LZMA-SDK/DOC/7zFormat.txt
new file mode 100644
index 000000000..9239e9355
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/7zFormat.txt
@@ -0,0 +1,469 @@
+7z Format description (18.06)
+----------------------------
+
+This file contains description of 7z archive format. 
+7z archive can contain files compressed with any method.
+See "Methods.txt" for description for defined compressing methods.
+
+
+Format structure Overview
+-------------------------
+
+Some fields can be optional.
+
+Archive structure
+~~~~~~~~~~~~~~~~~  
+SignatureHeader
+[PackedStreams]
+[PackedStreamsForHeaders]
+[
+  Header 
+  or 
+  {
+    Packed Header
+    HeaderInfo
+  }
+]
+
+
+
+Header structure
+~~~~~~~~~~~~~~~~  
+{
+  ArchiveProperties
+  AdditionalStreams
+  {
+    PackInfo
+    {
+      PackPos
+      NumPackStreams
+      Sizes[NumPackStreams]
+      CRCs[NumPackStreams]
+    }
+    CodersInfo
+    {
+      NumFolders
+      Folders[NumFolders]
+      {
+        NumCoders
+        CodersInfo[NumCoders]
+        {
+          ID
+          NumInStreams;
+          NumOutStreams;
+          PropertiesSize
+          Properties[PropertiesSize]
+        }
+        NumBindPairs
+        BindPairsInfo[NumBindPairs]
+        {
+          InIndex;
+          OutIndex;
+        }
+        PackedIndices
+      }
+      UnPackSize[Folders][Folders.NumOutstreams]
+      CRCs[NumFolders]
+    }
+    SubStreamsInfo
+    {
+      NumUnPackStreamsInFolders[NumFolders];
+      UnPackSizes[]
+      CRCs[]
+    }
+  }
+  MainStreamsInfo
+  {
+    (Same as in AdditionalStreams)
+  }
+  FilesInfo
+  {
+    NumFiles
+    Properties[]
+    {
+      ID
+      Size
+      Data
+    }
+  }
+}
+
+HeaderInfo structure
+~~~~~~~~~~~~~~~~~~~~
+{
+  (Same as in AdditionalStreams)
+}
+
+
+
+Notes about Notation and encoding
+---------------------------------
+
+7z uses little endian encoding.
+
+7z archive format has optional headers that are marked as
+[]
+Header
+[]
+
+REAL_UINT64 means real UINT64.
+
+UINT64 means real UINT64 encoded with the following scheme:
+
+  Size of encoding sequence depends from first byte:
+  First_Byte  Extra_Bytes        Value
+  (binary)   
+  0xxxxxxx               : ( xxxxxxx           )
+  10xxxxxx    BYTE y[1]  : (  xxxxxx << (8 * 1)) + y
+  110xxxxx    BYTE y[2]  : (   xxxxx << (8 * 2)) + y
+  ...
+  1111110x    BYTE y[6]  : (       x << (8 * 6)) + y
+  11111110    BYTE y[7]  :                         y
+  11111111    BYTE y[8]  :                         y
+
+
+
+Property IDs
+------------
+
+0x00 = kEnd
+
+0x01 = kHeader
+
+0x02 = kArchiveProperties
+    
+0x03 = kAdditionalStreamsInfo
+0x04 = kMainStreamsInfo
+0x05 = kFilesInfo
+    
+0x06 = kPackInfo
+0x07 = kUnPackInfo
+0x08 = kSubStreamsInfo
+
+0x09 = kSize
+0x0A = kCRC
+
+0x0B = kFolder
+
+0x0C = kCodersUnPackSize
+0x0D = kNumUnPackStream
+
+0x0E = kEmptyStream
+0x0F = kEmptyFile
+0x10 = kAnti
+
+0x11 = kName
+0x12 = kCTime
+0x13 = kATime
+0x14 = kMTime
+0x15 = kWinAttributes
+0x16 = kComment
+
+0x17 = kEncodedHeader
+
+0x18 = kStartPos
+0x19 = kDummy
+
+
+7z format headers
+-----------------
+
+SignatureHeader
+~~~~~~~~~~~~~~~
+  BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
+
+  ArchiveVersion
+  {
+    BYTE Major;   // now = 0
+    BYTE Minor;   // now = 4
+  };
+
+  UINT32 StartHeaderCRC;
+
+  StartHeader
+  {
+    REAL_UINT64 NextHeaderOffset
+    REAL_UINT64 NextHeaderSize
+    UINT32 NextHeaderCRC
+  }
+
+
+...........................
+
+
+ArchiveProperties
+~~~~~~~~~~~~~~~~~
+BYTE NID::kArchiveProperties (0x02)
+for (;;)
+{
+  BYTE PropertyType;
+  if (aType == 0)
+    break;
+  UINT64 PropertySize;
+  BYTE PropertyData[PropertySize];
+}
+
+
+Digests (NumStreams)
+~~~~~~~~~~~~~~~~~~~~~
+  BYTE AllAreDefined
+  if (AllAreDefined == 0)
+  {
+    for(NumStreams)
+      BIT Defined
+  }
+  UINT32 CRCs[NumDefined]
+
+
+PackInfo
+~~~~~~~~~~~~
+  BYTE NID::kPackInfo  (0x06)
+  UINT64 PackPos
+  UINT64 NumPackStreams
+
+  []
+  BYTE NID::kSize    (0x09)
+  UINT64 PackSizes[NumPackStreams]
+  []
+
+  []
+  BYTE NID::kCRC      (0x0A)
+  PackStreamDigests[NumPackStreams]
+  []
+
+  BYTE NID::kEnd
+
+
+Folder
+~~~~~~
+  UINT64 NumCoders;
+  for (NumCoders)
+  {
+    BYTE 
+    {
+      0:3 CodecIdSize
+      4:  Is Complex Coder
+      5:  There Are Attributes
+      6:  Reserved
+      7:  There are more alternative methods. (Not used anymore, must be 0).
+    } 
+    BYTE CodecId[CodecIdSize]
+    if (Is Complex Coder)
+    {
+      UINT64 NumInStreams;
+      UINT64 NumOutStreams;
+    }
+    if (There Are Attributes)
+    {
+      UINT64 PropertiesSize
+      BYTE Properties[PropertiesSize]
+    }
+  }
+    
+  NumBindPairs = NumOutStreamsTotal - 1;
+
+  for (NumBindPairs)
+  {
+    UINT64 InIndex;
+    UINT64 OutIndex;
+  }
+
+  NumPackedStreams = NumInStreamsTotal - NumBindPairs;
+  if (NumPackedStreams > 1)
+    for(NumPackedStreams)
+    {
+      UINT64 Index;
+    };
+
+
+
+
+Coders Info
+~~~~~~~~~~~
+
+  BYTE NID::kUnPackInfo  (0x07)
+
+
+  BYTE NID::kFolder  (0x0B)
+  UINT64 NumFolders
+  BYTE External
+  switch(External)
+  {
+    case 0:
+      Folders[NumFolders]
+    case 1:
+      UINT64 DataStreamIndex
+  }
+
+
+  BYTE ID::kCodersUnPackSize  (0x0C)
+  for(Folders)
+    for(Folder.NumOutStreams)
+     UINT64 UnPackSize;
+
+
+  []
+  BYTE NID::kCRC   (0x0A)
+  UnPackDigests[NumFolders]
+  []
+
+  
+
+  BYTE NID::kEnd
+
+
+
+SubStreams Info
+~~~~~~~~~~~~~~
+  BYTE NID::kSubStreamsInfo; (0x08)
+
+  []
+  BYTE NID::kNumUnPackStream; (0x0D)
+  UINT64 NumUnPackStreamsInFolders[NumFolders];
+  []
+
+
+  []
+  BYTE NID::kSize  (0x09)
+  UINT64 UnPackSizes[]
+  []
+
+
+  []
+  BYTE NID::kCRC  (0x0A)
+  Digests[Number of streams with unknown CRC]
+  []
+
+  
+  BYTE NID::kEnd
+
+
+Streams Info
+~~~~~~~~~~~~
+
+  []
+  PackInfo
+  []
+
+
+  []
+  CodersInfo
+  []
+
+
+  []
+  SubStreamsInfo
+  []
+
+  BYTE NID::kEnd
+
+
+FilesInfo
+~~~~~~~~~
+  BYTE NID::kFilesInfo;  (0x05)
+  UINT64 NumFiles
+
+  for (;;)
+  {
+    BYTE PropertyType;
+    if (aType == 0)
+      break;
+
+    UINT64 Size;
+
+    switch(PropertyType)
+    {
+      kEmptyStream:   (0x0E)
+        for(NumFiles)
+          BIT IsEmptyStream
+
+      kEmptyFile:     (0x0F)
+        for(EmptyStreams)
+          BIT IsEmptyFile
+
+      kAnti:          (0x10)
+        for(EmptyStreams)
+          BIT IsAntiFile
+      
+      case kCTime: (0x12)
+      case kATime: (0x13)
+      case kMTime: (0x14)
+        BYTE AllAreDefined
+        if (AllAreDefined == 0)
+        {
+          for(NumFiles)
+            BIT TimeDefined
+        }
+        BYTE External;
+        if(External != 0)
+          UINT64 DataIndex
+        []
+        for(Definded Items)
+          REAL_UINT64 Time
+        []
+      
+      kNames:     (0x11)
+        BYTE External;
+        if(External != 0)
+          UINT64 DataIndex
+        []
+        for(Files)
+        {
+          wchar_t Names[NameSize];
+          wchar_t 0;
+        }
+        []
+
+      kAttributes:  (0x15)
+        BYTE AllAreDefined
+        if (AllAreDefined == 0)
+        {
+          for(NumFiles)
+            BIT AttributesAreDefined
+        }
+        BYTE External;
+        if(External != 0)
+          UINT64 DataIndex
+        []
+        for(Definded Attributes)
+          UINT32 Attributes
+        []
+    }
+  }
+
+
+Header
+~~~~~~
+  BYTE NID::kHeader (0x01)
+
+  []
+  ArchiveProperties
+  []
+
+  []
+  BYTE NID::kAdditionalStreamsInfo; (0x03)
+  StreamsInfo
+  []
+
+  []
+  BYTE NID::kMainStreamsInfo;    (0x04)
+  StreamsInfo
+  []
+
+  []
+  FilesInfo
+  []
+
+  BYTE NID::kEnd
+
+
+HeaderInfo
+~~~~~~~~~~
+  []
+  BYTE NID::kEncodedHeader; (0x17)
+  StreamsInfo for Encoded Header
+  []
+
+
+---
+End of document
diff --git a/deps/LZMA-SDK/DOC/Methods.txt b/deps/LZMA-SDK/DOC/Methods.txt
new file mode 100644
index 000000000..6d0641bae
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/Methods.txt
@@ -0,0 +1,173 @@
+7-Zip method IDs for 7z and xz archives
+---------------------------------------
+
+Version: 18.06
+Date: 2018-06-30
+
+Each compression or crypto method in 7z is associated with unique binary value (ID).
+The length of ID in bytes is arbitrary but it can not exceed 63 bits (8 bytes).
+
+xz and 7z formats use same ID map.
+
+If you want to add some new ID, you have two ways:
+  1) Write request for allocating IDs to 7-Zip developers.
+  2) Generate 8-bytes ID:
+
+    3F ZZ ZZ ZZ ZZ ZZ MM MM 
+
+    3F              - Prefix for random IDs (1 byte)
+    ZZ ZZ ZZ ZZ ZZ  - Developer ID (5 bytes). Use real random bytes. 
+                      
+    MM MM           - Method ID (2 bytes)
+
+    You can notify 7-Zip developers about your Developer ID / Method ID.
+
+    Note: Use new ID, if old codec can not decode data encoded with new version.
+
+
+List of defined IDs
+-------------------
+      
+00 - Copy
+
+03 - Delta
+04 - BCJ (x86)
+05 - PPC (big-endian)
+06 - IA64
+07 - ARM (little-endian)
+08 - ARMT (little-endian)
+09 - SPARC
+
+21 - LZMA2
+          
+02.. - Common
+   03 [Swap]
+      - 2 Swap2
+      - 4 Swap4
+
+03.. - 7z
+   01 - 
+      01 - LZMA
+  
+   03 - [Branch Codecs]
+      01 - [x86 Codecs]
+         03  - BCJ
+         1B  - BCJ2 (4 packed streams)
+      02 - 
+         05 - PPC (big-endian)
+      03 - 
+         01 - Alpha
+      04 - 
+         01 - IA64
+      05 - 
+         01 - ARM (little-endian)
+      06 - 
+         05 - M68 (big-endian)
+      07 - 
+         01 - ARMT (little-endian)
+      08 - 
+         05 - SPARC
+
+   04 - 
+      01 - PPMD
+
+   7F -
+      01 - experimental method.
+
+
+04.. - Misc codecs
+
+   00 - Reserved
+
+   01 - [Zip]
+      00 - Copy (not used. Use {00} instead)
+      01 - Shrink
+      06 - Implode
+      08 - Deflate
+      09 - Deflate64
+      0A - Imploding
+      0C - BZip2 (not used. Use {040202} instead)
+      0E - LZMA (LZMA-zip)
+      5F - xz
+      60 - Jpeg
+      61 - WavPack
+      62 - PPMd (PPMd-zip)
+      63 - wzAES
+
+   02 - 
+      02 - BZip2
+
+   03 - [Rar]
+      01 - Rar1
+      02 - Rar2
+      03 - Rar3
+      05 - Rar5
+
+   04 - [Arj]
+      01 - Arj(1,2,3)
+      02 - Arj4
+
+   05 - [Z]
+
+   06 - [Lzh]
+
+   07 - Reserved for 7z
+
+   08 - [Cab]
+
+   09 - [NSIS]
+      01 - DeflateNSIS
+      02 - BZip2NSIS
+
+   F7 - External codecs (that are not included to 7-Zip)
+
+      0x xx - reserved
+
+      10 xx - reserved (LZHAM)
+         01 - LZHAM
+
+      11 xx - reserved (Tino Reichardt)
+         01 - ZSTD
+         02 - BROTLI
+         04 - LZ4
+         05 - LZ5
+         06 - LIZARD
+
+      12 xx - reserverd (Denis Anisimov)
+        
+         01 - WavPack2
+         FE - eSplitter 
+         FF - RawSplitter
+ 
+
+06.. - Crypto 
+
+   F0 - Ciphers without hashing algo
+
+      01 - [AES]
+         0x - AES-128
+         4x - AES-192
+         8x - AES-256
+         Cx - AES
+
+         x0 - ECB
+         x1 - CBC
+         x2 - CFB
+         x3 - OFB
+         x4 - CTR
+
+   F1 - Combine Ciphers
+
+      01 - [Zip]
+         01 - ZipCrypto (Main Zip crypto algo)
+
+      03 - [RAR]
+         02 - 
+         03 - Rar29AES (AES-128 + modified SHA-1)
+
+      07 - [7z]
+         01 - 7zAES (AES-256 + SHA-256)
+
+
+---
+End of document
diff --git a/deps/LZMA-SDK/DOC/installer.txt b/deps/LZMA-SDK/DOC/installer.txt
new file mode 100644
index 000000000..70ad7dc6a
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/installer.txt
@@ -0,0 +1,166 @@
+7-Zip for installers 9.38
+-------------------------
+
+7-Zip is a file archiver for Windows NT/2000/2003/2008/XP/Vista/7/8/10. 
+
+7-Zip for installers is part of LZMA SDK.
+LZMA SDK is written and placed in the public domain by Igor Pavlov.
+
+It's allowed to join 7-Zip SFX module with another software.
+It's allowed to change resources of 7-Zip's SFX modules.
+
+
+HOW to use
+-----------
+
+7zr.exe is reduced version of 7za.exe of 7-Zip.
+7zr.exe supports only format with these codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, Copy.
+
+Example of compressing command for installation packages:
+
+7zr a archive.7z files
+
+7zSD.sfx is SFX module for installers. 7zSD.sfx uses msvcrt.dll.
+
+SFX modules for installers allow to create installation program. 
+Such module extracts archive to temp folder and then runs specified program and removes 
+temp files after program finishing. Self-extract archive for installers must be created 
+as joining 3 files: SFX_Module, Installer_Config, 7z_Archive. 
+Installer_Config is optional file. You can use the following command to create installer 
+self-extract archive:
+
+copy /b 7zSD.sfx + config.txt + archive.7z archive.exe
+
+The smallest installation package size can be achieved, if installation files was 
+uncompressed before including to 7z archive.
+
+-y switch for installer module (at runtime) specifies quiet mode for extracting.
+
+Installer Config file format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Config file contains commands for Installer. File begins from string 
+;!@Install@!UTF-8! and ends with ;!@InstallEnd@!. File must be written 
+in UTF-8 encoding. File contains string pairs: 
+
+ID_String="Value"
+
+ID_String          Description 
+
+Title              Title for messages 
+BeginPrompt        Begin Prompt message 
+Progress           Value can be "yes" or "no". Default value is "yes". 
+RunProgram         Command for executing. Default value is "setup.exe". 
+                   Substring %%T will be replaced with path to temporary 
+                   folder, where files were extracted 
+Directory          Directory prefix for "RunProgram". Default value is ".\\" 
+ExecuteFile        Name of file for executing 
+ExecuteParameters  Parameters for "ExecuteFile" 
+
+
+You can omit any string pair.
+
+There are two ways to run program: RunProgram and ExecuteFile. 
+Use RunProgram, if you want to run some program from .7z archive. 
+Use ExecuteFile, if you want to open some document from .7z archive or 
+if you want to execute some command from Windows.
+
+If you use RunProgram and if you specify empty directory prefix: Directory="", 
+the system searches for the executable file in the following sequence:
+
+1. The directory from which the application (installer) loaded. 
+2. The temporary folder, where files were extracted. 
+3. The Windows system directory. 
+
+
+Config file Examples
+~~~~~~~~~~~~~~~~~~~~
+
+;!@Install@!UTF-8!
+Title="7-Zip 4.00"
+BeginPrompt="Do you want to install the 7-Zip 4.00?"
+RunProgram="setup.exe"
+;!@InstallEnd@!
+
+
+
+;!@Install@!UTF-8!
+Title="7-Zip 4.00"
+BeginPrompt="Do you want to install the 7-Zip 4.00?"
+ExecuteFile="7zip.msi"
+;!@InstallEnd@!
+
+
+
+;!@Install@!UTF-8!
+Title="7-Zip 4.01 Update"
+BeginPrompt="Do you want to install the 7-Zip 4.01 Update?"
+ExecuteFile="msiexec.exe"
+ExecuteParameters="/i 7zip.msi REINSTALL=ALL REINSTALLMODE=vomus"
+;!@InstallEnd@!
+
+
+
+Small SFX modules for installers
+--------------------------------
+
+7zS2.sfx     - small SFX module (GUI version)
+7zS2con.sfx  - small SFX module (Console version)
+
+Small SFX modules support this codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, COPY
+
+Small SFX module is similar to common SFX module for installers.
+The difference (what's new in small version):
+ - Smaller size (30 KB vs 100 KB)
+ - C source code instead of �++
+ - No installer Configuration file
+ - No extracting progress window
+ - It decompresses solid 7z blocks (it can be whole 7z archive) to RAM.
+   So user that calls SFX installer must have free RAM of size of largest 
+   solid 7z block (size of 7z archive at simplest case).
+
+How to use
+----------
+
+copy /b 7zS2.sfx + archive.7z sfx.exe
+
+When you run installer sfx module (sfx.exe)
+1) It creates "7zNNNNNNNN" temp folder in system temp folder.
+2) It extracts .7z archive to that folder
+3) It executes one file from "7zNNNNNNNN" temp folder. 
+4) It removes "7zNNNNNNNN" temp folder
+
+You can send parameters to installer, and installer will transfer them to extracted .exe file.
+
+Small SFX uses 3 levels of priorities to select file to execute:
+
+  1) Files in root folder have higher priority than files in subfolders.
+  2) File extension priorities (from high to low priority order): 
+       bat, cmd, exe, inf, msi, cab (under Windows CE), html, htm
+  3) File name priorities (from high to low priority order): 
+       setup, install, run, start
+
+Windows CE (ARM) version of 7zS2.sfx is included to 7-Zip for Windows Mobile package.
+
+
+Examples
+--------
+
+1) To create compressed console 7-Zip:
+
+7zr a c.7z 7z.exe 7z.dll -mx
+copy /b 7zS2con.sfx + c.7z 7zCompr.exe
+7zCompr.exe b -md22
+
+
+2) To create compressed GUI 7-Zip:
+
+7zr a g.7z 7zg.exe 7z.dll -mx
+copy /b 7zS2.sfx + g.7z 7zgCompr.exe
+7zgCompr.exe b -md22
+
+
+3) To open some file:
+
+7zr a h.7z readme.txt -mx
+copy /b 7zS2.sfx + h.7z 7zTxt.exe 
+7zTxt.exe
diff --git a/deps/LZMA-SDK/DOC/lzma-history.txt b/deps/LZMA-SDK/DOC/lzma-history.txt
new file mode 100644
index 000000000..3fc19fd8b
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/lzma-history.txt
@@ -0,0 +1,484 @@
+HISTORY of the LZMA SDK
+-----------------------
+
+21.02 alpha    2021-05-06
+-------------------------
+- The command line version of 7-Zip for macOS was released.
+- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux 
+  was increased by 20%-60%.
+
+
+21.01 alpha    2021-03-09
+-------------------------
+- The command line version of 7-Zip for Linux was released.
+- The improvements for speed of ARM64 version using hardware CPU instructions 
+  for AES, CRC-32, SHA-1 and SHA-256.
+- Some bugs were fixed.
+
+
+20.02 alpha    2020-08-08
+-------------------------
+- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64.
+  It allows to increase the compression speed for big 7z archives, if there is a big number 
+  of CPU cores and threads.
+- The speed of PPMd compressing/decompressing was increased for 7z archives.
+- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system 
+  to modify "Last Access Time" property of source files for archiving and hashing operations. 
+- Some bugs were fixed.
+
+
+20.00 alpha    2020-02-06
+-------------------------
+- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, 
+  that can work faster than bt4 and hc4 match finders for the data with big redundancy.
+- The compression ratio was improved for Fast and Fastest compression levels with the 
+  following default settings:
+   - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary.
+   - Fast    level (-mx3) : hc5 match finder with 4 MB dictionary.
+- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra 
+  compression levels.
+
+
+19.00          2019-02-21
+-------------------------
+- Encryption strength for 7z archives was increased:
+  the size of random initialization vector was increased from 64-bit to 128-bit,
+  and the pseudo-random number generator was improved.
+- The bug in 7zIn.c code was fixed.
+
+
+18.06          2018-12-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased by 3-10%,
+  and there are minor changes in compression ratio.
+- Some bugs were fixed.
+- The bug in 7-Zip 18.02-18.05 was fixed:
+  There was memory leak in multithreading xz decoder - XzDecMt_Decode(),
+  if xz stream contains only one block.
+- The changes for MSVS compiler makefiles: 
+   - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64)
+     instead of "CPU" macroname with values (AMD64, ARM64).
+   - the makefiles by default now use static version of the run-time library.
+
+
+18.05          2018-04-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased 
+    by 8% for fastest/fast compression levels and 
+    by 3% for normal/maximum compression levels.
+- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in
+  Windows 10 because of some BUG with "Large Pages" in Windows 10. 
+  Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299).
+- The BUG was fixed in Lzma2Enc.c
+    Lzma2Enc_Encode2() function worked incorretly,
+      if (inStream == NULL) and the number of block threads is more than 1.
+
+
+18.03 beta     2018-03-04
+-------------------------
+- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm 
+  for x64 with about 30% higher speed than main version of LZMA decoder written in C.
+- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%.
+- 7-Zip now can use multi-threading for 7z/LZMA2 decoding,
+  if there are multiple independent data chunks in LZMA2 stream.
+- 7-Zip now can use multi-threading for xz decoding,
+  if there are multiple blocks in xz stream.
+
+
+18.01          2019-01-28
+-------------------------
+- The BUG in 17.01 - 18.00 beta was fixed:
+  XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished()
+  didn't work correctly for xz archives without checksum (CRC).
+
+
+18.00 beta     2019-01-10
+-------------------------
+- The BUG in xz encoder was fixed:
+  There was memory leak of 16 KB for each file compressed with 
+  xz compression method, if additional filter was used.
+
+
+17.01 beta     2017-08-28
+-------------------------
+- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression.
+  7-Zip now uses additional memory buffers for multi-block LZMA2 compression.
+  CPU utilization was slightly improved.
+- 7-zip now creates multi-block xz archives by default. Block size can be 
+  specified with -ms[Size]{m|g} switch.
+- xz decoder now can unpack random block from multi-block xz archives.
+- 7-Zip command line: @listfile now doesn't work after -- switch.
+  Use -i@listfile before -- switch instead.
+- The BUGs were fixed:
+  7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive.
+
+
+17.00 beta     2017-04-29
+-------------------------
+- NewHandler.h / NewHandler.cpp: 
+    now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900).
+- C/7zTypes.h : the names of variables in interface structures were changed (vt).
+- Some bugs were fixed. 7-Zip could crash in some cases.
+- Some internal changes in code.
+
+
+16.04          2016-10-04
+-------------------------
+- The bug was fixed in DllSecur.c.
+
+
+16.03          2016-09-28
+-------------------------
+- SFX modules now use some protection against DLL preloading attack.
+- Some bugs in 7z code were fixed.
+
+
+16.02          2016-05-21
+-------------------------
+- The BUG in 16.00 - 16.01 was fixed:
+  Split Handler (SplitHandler.cpp) returned incorrect 
+  total size value (kpidSize) for split archives.
+
+
+16.01          2016-05-19
+-------------------------	
+- Some internal changes to reduce the number of compiler warnings.
+
+
+16.00          2016-05-10
+-------------------------	
+- Some bugs were fixed.
+
+
+15.12          2015-11-19
+-------------------------	
+- The BUG in C version of 7z decoder was fixed:
+  7zDec.c : SzDecodeLzma2()
+  7z decoder could mistakenly report about decoding error for some 7z archives
+  that use LZMA2 compression method.
+  The probability to get that mistaken decoding error report was about 
+  one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). 
+- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed:
+  7zArcIn.c : SzReadHeader2()
+  7z decoder worked incorrectly for 7z archives that contain 
+  empty solid blocks, that can be placed to 7z archive, if some file is 
+  unavailable for reading during archive creation.
+
+
+15.09 beta     2015-10-16
+-------------------------	
+- The BUG in LZMA / LZMA2 encoding code was fixed.
+  The BUG in LzFind.c::MatchFinder_ReadBlock() function.
+  If input data size is larger than (4 GiB - dictionary_size),
+  the following code worked incorrectly:
+  -  LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions 
+     for compressing from memory to memory. 
+     That BUG is not related to LZMA encoder version that works via streams.
+  -  LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if 
+     default value of chunk size (CLzma2EncProps::blockSize) is changed 
+     to value larger than (4 GiB - dictionary_size).
+
+
+9.38 beta      2015-01-03
+-------------------------	
+- The BUG in 9.31-9.37 was fixed:
+  IArchiveGetRawProps interface was disabled for 7z archives.
+- The BUG in 9.26-9.36 was fixed:
+  Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows.
+
+
+9.36 beta      2014-12-26
+-------------------------	
+- The BUG in command line version was fixed:
+  7-Zip created temporary archive in current folder during update archive
+  operation, if -w{Path} switch was not specified. 
+  The fixed 7-Zip creates temporary archive in folder that contains updated archive.
+- The BUG in 9.33-9.35 was fixed:
+  7-Zip silently ignored file reading errors during 7z or gz archive creation,
+  and the created archive contained only part of file that was read before error.
+  The fixed 7-Zip stops archive creation and it reports about error.
+
+
+9.35 beta      2014-12-07
+-------------------------	
+- 7zr.exe now support AES encryption.
+- SFX mudules were added to LZMA SDK
+- Some bugs were fixed.
+
+
+9.21 beta      2011-04-11
+-------------------------	
+- New class FString for file names at file systems.
+- Speed optimization in CRC code for big-endian CPUs.
+- The BUG in Lzma2Dec.c was fixed:
+    Lzma2Decode function didn't work.
+
+
+9.18 beta      2010-11-02
+-------------------------	
+- New small SFX module for installers (SfxSetup).
+
+
+9.12 beta      2010-03-24
+-------------------------
+- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work,
+  if more than 10 threads were used (or more than 20 threads in some modes).
+
+
+9.11 beta      2010-03-15
+-------------------------
+- PPMd compression method support
+   
+
+9.09           2009-12-12
+-------------------------
+- The bug was fixed:
+   Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c
+   incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8.
+- Some bugs were fixed
+
+
+9.06           2009-08-17
+-------------------------
+- Some changes in ANSI-C 7z Decoder interfaces.
+
+
+9.04           2009-05-30
+-------------------------
+- LZMA2 compression method support
+- xz format support
+
+
+4.65           2009-02-03
+-------------------------
+- Some minor fixes
+
+
+4.63           2008-12-31
+-------------------------
+- Some minor fixes
+
+
+4.61 beta      2008-11-23
+-------------------------
+- The bug in ANSI-C LZMA Decoder was fixed:
+    If encoded stream was corrupted, decoder could access memory 
+    outside of allocated range.
+- Some changes in ANSI-C 7z Decoder interfaces.
+- LZMA SDK is placed in the public domain.
+
+
+4.60 beta      2008-08-19
+-------------------------
+- Some minor fixes.
+
+
+4.59 beta      2008-08-13
+-------------------------
+- The bug was fixed:
+    LZMA Encoder in fast compression mode could access memory outside of 
+    allocated range in some rare cases.
+
+
+4.58 beta      2008-05-05
+-------------------------
+- ANSI-C LZMA Decoder was rewritten for speed optimizations.
+- ANSI-C LZMA Encoder was included to LZMA SDK.
+- C++ LZMA code now is just wrapper over ANSI-C code.
+
+
+4.57           2007-12-12
+-------------------------
+- Speed optimizations in �++ LZMA Decoder. 
+- Small changes for more compatibility with some C/C++ compilers.
+
+
+4.49 beta      2007-07-05
+-------------------------
+- .7z ANSI-C Decoder:
+     - now it supports BCJ and BCJ2 filters
+     - now it supports files larger than 4 GB.
+     - now it supports "Last Write Time" field for files.
+- C++ code for .7z archives compressing/decompressing from 7-zip 
+  was included to LZMA SDK.
+  
+
+4.43           2006-06-04
+-------------------------
+- Small changes for more compatibility with some C/C++ compilers.
+  
+
+4.42           2006-05-15
+-------------------------
+- Small changes in .h files in ANSI-C version.
+  
+
+4.39 beta      2006-04-14
+-------------------------
+- The bug in versions 4.33b:4.38b was fixed:
+  C++ version of LZMA encoder could not correctly compress 
+  files larger than 2 GB with HC4 match finder (-mfhc4).
+  
+
+4.37 beta      2005-04-06
+-------------------------
+- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. 
+
+
+4.35 beta      2005-03-02
+-------------------------
+- The bug was fixed in C++ version of LZMA Decoder:
+    If encoded stream was corrupted, decoder could access memory 
+    outside of allocated range.
+
+
+4.34 beta      2006-02-27
+-------------------------
+- Compressing speed and memory requirements for compressing were increased
+- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
+
+
+4.32           2005-12-09
+-------------------------
+- Java version of LZMA SDK was included
+
+
+4.30           2005-11-20
+-------------------------
+- Compression ratio was improved in -a2 mode
+- Speed optimizations for compressing in -a2 mode
+- -fb switch now supports values up to 273
+- The bug in 7z_C (7zIn.c) was fixed:
+  It used Alloc/Free functions from different memory pools.
+  So if program used two memory pools, it worked incorrectly.
+- 7z_C: .7z format supporting was improved
+- LZMA# SDK (C#.NET version) was included
+
+
+4.27 (Updated) 2005-09-21
+-------------------------
+- Some GUIDs/interfaces in C++ were changed.
+ IStream.h:
+   ISequentialInStream::Read now works as old ReadPart
+   ISequentialOutStream::Write now works as old WritePart
+
+
+4.27           2005-08-07
+-------------------------
+- The bug in LzmaDecodeSize.c was fixed:
+   if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
+   decompressing worked incorrectly.
+
+
+4.26           2005-08-05
+-------------------------
+- Fixes in 7z_C code and LzmaTest.c:
+  previous versions could work incorrectly,
+  if malloc(0) returns 0
+
+
+4.23           2005-06-29
+-------------------------
+- Small fixes in C++ code
+
+
+4.22           2005-06-10
+-------------------------
+- Small fixes
+
+
+4.21           2005-06-08
+-------------------------
+- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
+- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
+    - LzmaStateDecode.h
+    - LzmaStateDecode.c
+    - LzmaStateTest.c
+- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
+
+
+4.17           2005-04-18
+-------------------------
+- New example for RAM->RAM compressing/decompressing: 
+  LZMA + BCJ (filter for x86 code):
+    - LzmaRam.h
+    - LzmaRam.cpp
+    - LzmaRamDecode.h
+    - LzmaRamDecode.c
+    - -f86 switch for lzma.exe
+
+
+4.16           2005-03-29
+-------------------------
+- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): 
+   If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
+   decoder could access memory outside of allocated range.
+- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
+  Old version of LZMA Decoder now is in file LzmaDecodeSize.c. 
+  LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
+- Small speed optimization in LZMA C++ code
+- filter for SPARC's code was added
+- Simplified version of .7z ANSI-C Decoder was included
+
+
+4.06           2004-09-05
+-------------------------
+- The bug in v4.05 was fixed:
+    LZMA-Encoder didn't release output stream in some cases.
+
+
+4.05           2004-08-25
+-------------------------
+- Source code of filters for x86, IA-64, ARM, ARM-Thumb 
+  and PowerPC code was included to SDK
+- Some internal minor changes
+
+
+4.04           2004-07-28
+-------------------------
+- More compatibility with some C++ compilers
+
+
+4.03           2004-06-18
+-------------------------
+- "Benchmark" command was added. It measures compressing 
+  and decompressing speed and shows rating values. 
+  Also it checks hardware errors.
+
+
+4.02           2004-06-10
+-------------------------
+- C++ LZMA Encoder/Decoder code now is more portable
+  and it can be compiled by GCC on Linux.
+
+
+4.01           2004-02-15
+-------------------------
+- Some detection of data corruption was enabled.
+    LzmaDecode.c / RangeDecoderReadByte
+    .....
+    {
+      rd->ExtraBytes = 1;
+      return 0xFF;
+    }
+
+
+4.00           2004-02-13
+-------------------------
+- Original version of LZMA SDK
+
+
+
+HISTORY of the LZMA
+-------------------
+  2001-2008:  Improvements to LZMA compressing/decompressing code, 
+              keeping compatibility with original LZMA format
+  1996-2001:  Development of LZMA compression format
+
+  Some milestones:
+
+  2001-08-30: LZMA compression was added to 7-Zip
+  1999-01-02: First version of 7-Zip was released
+  
+
+End of document
diff --git a/deps/LZMA-SDK/DOC/lzma-sdk.txt b/deps/LZMA-SDK/DOC/lzma-sdk.txt
new file mode 100644
index 000000000..b11716938
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/lzma-sdk.txt
@@ -0,0 +1,357 @@
+LZMA SDK 21.02
+--------------
+
+LZMA SDK provides the documentation, samples, header files,
+libraries, and tools you need to develop applications that 
+use 7z / LZMA / LZMA2 / XZ compression.
+
+LZMA is an improved version of famous LZ77 compression algorithm. 
+It was improved in way of maximum increasing of compression ratio,
+keeping high decompression speed and low memory requirements for 
+decompressing.
+
+LZMA2 is a LZMA based compression method. LZMA2 provides better 
+multithreading support for compression than LZMA and some other improvements.
+
+7z is a file format for data compression and file archiving.
+7z is a main file format for 7-Zip compression program (www.7-zip.org).
+7z format supports different compression methods: LZMA, LZMA2 and others.
+7z also supports AES-256 based encryption.
+
+XZ is a file format for data compression that uses LZMA2 compression.
+XZ format provides additional features: SHA/CRC check, filters for 
+improved compression ratio, splitting to blocks and streams,
+
+
+
+LICENSE
+-------
+
+LZMA SDK is written and placed in the public domain by Igor Pavlov.
+
+Some code in LZMA SDK is based on public domain code from another developers:
+  1) PPMd var.H (2001): Dmitry Shkarin
+  2) SHA-256: Wei Dai (Crypto++ library)
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute the 
+original LZMA SDK code, either in source code form or as a compiled binary, for 
+any purpose, commercial or non-commercial, and by any means.
+
+LZMA SDK code is compatible with open source licenses, for example, you can 
+include it to GNU GPL or GNU LGPL code.
+
+
+LZMA SDK Contents
+-----------------
+
+  Source code:
+
+    - C / C++ / C# / Java   - LZMA compression and decompression
+    - C / C++               - LZMA2 compression and decompression
+    - C / C++               - XZ compression and decompression
+    - C                     - 7z decompression
+    -     C++               - 7z compression and decompression
+    - C                     - small SFXs for installers (7z decompression)
+    -     C++               - SFXs and SFXs for installers (7z decompression)
+
+  Precomiled binaries:
+
+    - console programs for lzma / 7z / xz compression and decompression
+    - SFX modules for installers.
+
+
+UNIX/Linux version 
+------------------
+To compile C++ version of file->file LZMA encoding, go to directory
+CPP/7zip/Bundles/LzmaCon
+and call make to recompile it:
+  make -f makefile.gcc clean all
+
+In some UNIX/Linux versions you must compile LZMA with static libraries.
+To compile with static libraries, you can use 
+LIB = -lm -static
+
+Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux):
+  
+  http://p7zip.sourceforge.net/
+
+
+Files
+-----
+
+DOC/7zC.txt          - 7z ANSI-C Decoder description
+DOC/7zFormat.txt     - 7z Format description
+DOC/installer.txt    - information about 7-Zip for installers
+DOC/lzma.txt         - LZMA compression description
+DOC/lzma-sdk.txt     - LZMA SDK description (this file)
+DOC/lzma-history.txt - history of LZMA SDK
+DOC/lzma-specification.txt - Specification of LZMA
+DOC/Methods.txt      - Compression method IDs for .7z
+
+bin/installer/   - example script to create installer that uses SFX module,
+
+bin/7zdec.exe    - simplified 7z archive decoder
+bin/7zr.exe      - 7-Zip console program (reduced version)
+bin/x64/7zr.exe  - 7-Zip console program (reduced version) (x64 version)
+bin/lzma.exe     - file->file LZMA encoder/decoder for Windows
+bin/7zS2.sfx     - small SFX module for installers (GUI version)
+bin/7zS2con.sfx  - small SFX module for installers (Console version)
+bin/7zSD.sfx     - SFX module for installers.
+
+
+7zDec.exe
+---------
+7zDec.exe is simplified 7z archive decoder.
+It supports only LZMA, LZMA2, and PPMd methods.
+7zDec decodes whole solid block from 7z archive to RAM.
+The RAM consumption can be high.
+
+
+
+
+Source code structure
+---------------------
+
+
+Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption)
+
+C/  - C files (compression / decompression and other)
+  Util/
+    7z       - 7z decoder program (decoding 7z files)
+    Lzma     - LZMA program (file->file LZMA encoder/decoder).
+    LzmaLib  - LZMA library (.DLL for Windows)
+    SfxSetup - small SFX module for installers 
+
+CPP/ -- CPP files
+
+  Common  - common files for C++ projects
+  Windows - common files for Windows related code
+
+  7zip    - files related to 7-Zip
+
+    Archive - files related to archiving
+
+      Common   - common files for archive handling
+      7z       - 7z C++ Encoder/Decoder
+
+    Bundles  - Modules that are bundles of other modules (files)
+  
+      Alone7z       - 7zr.exe: Standalone 7-Zip console program (reduced version)
+      Format7zExtractR  - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2.
+      Format7zR         - 7zr.dll:  Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2
+      LzmaCon       - lzma.exe: LZMA compression/decompression
+      LzmaSpec      - example code for LZMA Specification
+      SFXCon        - 7zCon.sfx: Console 7z SFX module
+      SFXSetup      - 7zS.sfx: 7z SFX module for installers
+      SFXWin        - 7z.sfx: GUI 7z SFX module
+
+    Common   - common files for 7-Zip
+
+    Compress - files for compression/decompression
+
+    Crypto   - files for encryption / decompression
+
+    UI       - User Interface files
+         
+      Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll
+      Common   - Common UI files
+      Console  - Code for console program (7z.exe)
+      Explorer    - Some code from 7-Zip Shell extension
+      FileManager - Some GUI code from 7-Zip File Manager
+      GUI         - Some GUI code from 7-Zip
+
+
+CS/ - C# files
+  7zip
+    Common   - some common files for 7-Zip
+    Compress - files related to compression/decompression
+      LZ     - files related to LZ (Lempel-Ziv) compression algorithm
+      LZMA         - LZMA compression/decompression
+      LzmaAlone    - file->file LZMA compression/decompression
+      RangeCoder   - Range Coder (special code of compression/decompression)
+
+Java/  - Java files
+  SevenZip
+    Compression    - files related to compression/decompression
+      LZ           - files related to LZ (Lempel-Ziv) compression algorithm
+      LZMA         - LZMA compression/decompression
+      RangeCoder   - Range Coder (special code of compression/decompression)
+
+
+Note: 
+  Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code.
+  7-Zip's source code can be downloaded from 7-Zip's SourceForge page:
+
+  http://sourceforge.net/projects/sevenzip/
+
+
+
+LZMA features
+-------------
+  - Variable dictionary size (up to 1 GB)
+  - Estimated compressing speed: about 2 MB/s on 2 GHz CPU
+  - Estimated decompressing speed: 
+      - 20-30 MB/s on modern 2 GHz cpu
+      - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC)
+  - Small memory requirements for decompressing (16 KB + DictionarySize)
+  - Small code size for decompressing: 5-8 KB
+
+LZMA decoder uses only integer operations and can be 
+implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions).
+
+Some critical operations that affect the speed of LZMA decompression:
+  1) 32*16 bit integer multiply
+  2) Mispredicted branches (penalty mostly depends from pipeline length)
+  3) 32-bit shift and arithmetic operations
+
+The speed of LZMA decompressing mostly depends from CPU speed.
+Memory speed has no big meaning. But if your CPU has small data cache, 
+overall weight of memory speed will slightly increase.
+
+
+How To Use
+----------
+
+Using LZMA encoder/decoder executable
+--------------------------------------
+
+Usage:  LZMA <e|d> inputFile outputFile [<switches>...]
+
+  e: encode file
+
+  d: decode file
+
+  b: Benchmark. There are two tests: compressing and decompressing 
+     with LZMA method. Benchmark shows rating in MIPS (million 
+     instructions per second). Rating value is calculated from 
+     measured speed and it is normalized with Intel's Core 2 results.
+     Also Benchmark checks possible hardware errors (RAM 
+     errors in most cases). Benchmark uses these settings:
+     (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. 
+     Also you can change the number of iterations. Example for 30 iterations:
+       LZMA b 30
+     Default number of iterations is 10.
+
+<Switches>
+  
+
+  -a{N}:  set compression mode 0 = fast, 1 = normal
+          default: 1 (normal)
+
+  d{N}:   Sets Dictionary size - [0, 30], default: 23 (8MB)
+          The maximum value for dictionary size is 1 GB = 2^30 bytes.
+          Dictionary size is calculated as DictionarySize = 2^N bytes. 
+          For decompressing file compressed by LZMA method with dictionary 
+          size D = 2^N you need about D bytes of memory (RAM).
+
+  -fb{N}: set number of fast bytes - [5, 273], default: 128
+          Usually big number gives a little bit better compression ratio 
+          and slower compression process.
+
+  -lc{N}: set number of literal context bits - [0, 8], default: 3
+          Sometimes lc=4 gives gain for big files.
+
+  -lp{N}: set number of literal pos bits - [0, 4], default: 0
+          lp switch is intended for periodical data when period is 
+          equal 2^N. For example, for 32-bit (4 bytes) 
+          periodical data you can use lp=2. Often it's better to set lc0, 
+          if you change lp switch.
+
+  -pb{N}: set number of pos bits - [0, 4], default: 2
+          pb switch is intended for periodical data 
+          when period is equal 2^N.
+
+  -mf{MF_ID}: set Match Finder. Default: bt4. 
+              Algorithms from hc* group doesn't provide good compression 
+              ratio, but they often works pretty fast in combination with 
+              fast mode (-a0).
+
+              Memory requirements depend from dictionary size 
+              (parameter "d" in table below). 
+
+               MF_ID     Memory                   Description
+
+                bt2    d *  9.5 + 4MB  Binary Tree with 2 bytes hashing.
+                bt3    d * 11.5 + 4MB  Binary Tree with 3 bytes hashing.
+                bt4    d * 11.5 + 4MB  Binary Tree with 4 bytes hashing.
+                hc4    d *  7.5 + 4MB  Hash Chain with 4 bytes hashing.
+
+  -eos:   write End Of Stream marker. By default LZMA doesn't write 
+          eos marker, since LZMA decoder knows uncompressed size 
+          stored in .lzma file header.
+
+  -si:    Read data from stdin (it will write End Of Stream marker).
+  -so:    Write data to stdout
+
+
+Examples:
+
+1) LZMA e file.bin file.lzma -d16 -lc0 
+
+compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K)  
+and 0 literal context bits. -lc0 allows to reduce memory requirements 
+for decompression.
+
+
+2) LZMA e file.bin file.lzma -lc0 -lp2
+
+compresses file.bin to file.lzma with settings suitable 
+for 32-bit periodical data (for example, ARM or MIPS code).
+
+3) LZMA d file.lzma file.bin
+
+decompresses file.lzma to file.bin.
+
+
+Compression ratio hints
+-----------------------
+
+Recommendations
+---------------
+
+To increase the compression ratio for LZMA compressing it's desirable 
+to have aligned data (if it's possible) and also it's desirable to locate
+data in such order, where code is grouped in one place and data is 
+grouped in other place (it's better than such mixing: code, data, code,
+data, ...).
+
+
+Filters
+-------
+You can increase the compression ratio for some data types, using
+special filters before compressing. For example, it's possible to 
+increase the compression ratio on 5-10% for code for those CPU ISAs: 
+x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC.
+
+You can find C source code of such filters in C/Bra*.* files
+
+You can check the compression ratio gain of these filters with such 
+7-Zip commands (example for ARM code):
+No filter:
+  7z a a1.7z a.bin -m0=lzma
+
+With filter for little-endian ARM code:
+  7z a a2.7z a.bin -m0=arm -m1=lzma        
+
+It works in such manner:
+Compressing    = Filter_encoding + LZMA_encoding
+Decompressing  = LZMA_decoding + Filter_decoding
+
+Compressing and decompressing speed of such filters is very high,
+so it will not increase decompressing time too much.
+Moreover, it reduces decompression time for LZMA_decoding, 
+since compression ratio with filtering is higher.
+
+These filters convert CALL (calling procedure) instructions 
+from relative offsets to absolute addresses, so such data becomes more 
+compressible.
+
+For some ISAs (for example, for MIPS) it's impossible to get gain from such filter.
+
+
+
+---
+
+http://www.7-zip.org
+http://www.7-zip.org/sdk.html
+http://www.7-zip.org/support.html
diff --git a/deps/LZMA-SDK/DOC/lzma-specification.txt b/deps/LZMA-SDK/DOC/lzma-specification.txt
new file mode 100644
index 000000000..b6796df75
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/lzma-specification.txt
@@ -0,0 +1,1176 @@
+LZMA specification (DRAFT version)
+----------------------------------
+
+Author: Igor Pavlov
+Date: 2015-06-14
+
+This specification defines the format of LZMA compressed data and lzma file format.
+
+Notation 
+--------
+
+We use the syntax of C++ programming language.
+We use the following types in C++ code:
+  unsigned - unsigned integer, at least 16 bits in size
+  int      - signed integer, at least 16 bits in size
+  UInt64   - 64-bit unsigned integer
+  UInt32   - 32-bit unsigned integer
+  UInt16   - 16-bit unsigned integer
+  Byte     - 8-bit unsigned integer
+  bool     - boolean type with two possible values: false, true
+
+
+lzma file format
+================
+
+The lzma file contains the raw LZMA stream and the header with related properties.
+
+The files in that format use ".lzma" extension.
+
+The lzma file format layout:
+
+Offset Size Description
+
+  0     1   LZMA model properties (lc, lp, pb) in encoded form
+  1     4   Dictionary size (32-bit unsigned integer, little-endian)
+  5     8   Uncompressed size (64-bit unsigned integer, little-endian)
+ 13         Compressed data (LZMA stream)
+
+LZMA properties:
+
+    name  Range          Description
+
+      lc  [0, 8]         the number of "literal context" bits
+      lp  [0, 4]         the number of "literal pos" bits
+      pb  [0, 4]         the number of "pos" bits
+dictSize  [0, 2^32 - 1]  the dictionary size 
+
+The following code encodes LZMA properties:
+
+void EncodeProperties(Byte *properties)
+{
+  properties[0] = (Byte)((pb * 5 + lp) * 9 + lc);
+  Set_UInt32_LittleEndian(properties + 1, dictSize);
+}
+
+If the value of dictionary size in properties is smaller than (1 << 12),
+the LZMA decoder must set the dictionary size variable to (1 << 12).
+
+#define LZMA_DIC_MIN (1 << 12)
+
+  unsigned lc, pb, lp;
+  UInt32 dictSize;
+  UInt32 dictSizeInProperties;
+
+  void DecodeProperties(const Byte *properties)
+  {
+    unsigned d = properties[0];
+    if (d >= (9 * 5 * 5))
+      throw "Incorrect LZMA properties";
+    lc = d % 9;
+    d /= 9;
+    pb = d / 5;
+    lp = d % 5;
+    dictSizeInProperties = 0;
+    for (int i = 0; i < 4; i++)
+      dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
+    dictSize = dictSizeInProperties;
+    if (dictSize < LZMA_DIC_MIN)
+      dictSize = LZMA_DIC_MIN;
+  }
+
+If "Uncompressed size" field contains ones in all 64 bits, it means that
+uncompressed size is unknown and there is the "end marker" in stream,
+that indicates the end of decoding point.
+In opposite case, if the value from "Uncompressed size" field is not
+equal to ((2^64) - 1), the LZMA stream decoding must be finished after
+specified number of bytes (Uncompressed size) is decoded. And if there 
+is the "end marker", the LZMA decoder must read that marker also.
+
+
+The new scheme to encode LZMA properties
+----------------------------------------
+
+If LZMA compression is used for some another format, it's recommended to
+use a new improved scheme to encode LZMA properties. That new scheme was
+used in xz format that uses the LZMA2 compression algorithm.
+The LZMA2 is a new compression algorithm that is based on the LZMA algorithm.
+
+The dictionary size in LZMA2 is encoded with just one byte and LZMA2 supports
+only reduced set of dictionary sizes:
+  (2 << 11), (3 << 11),
+  (2 << 12), (3 << 12),
+  ...
+  (2 << 30), (3 << 30),
+  (2 << 31) - 1
+
+The dictionary size can be extracted from encoded value with the following code:
+
+  dictSize = (p == 40) ? 0xFFFFFFFF : (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11));
+
+Also there is additional limitation (lc + lp <= 4) in LZMA2 for values of 
+"lc" and "lp" properties:
+
+  if (lc + lp > 4)
+    throw "Unsupported properties: (lc + lp) > 4";
+
+There are some advantages for LZMA decoder with such (lc + lp) value
+limitation. It reduces the maximum size of tables allocated by decoder.
+And it reduces the complexity of initialization procedure, that can be 
+important to keep high speed of decoding of big number of small LZMA streams.
+
+It's recommended to use that limitation (lc + lp <= 4) for any new format
+that uses LZMA compression. Note that the combinations of "lc" and "lp" 
+parameters, where (lc + lp > 4), can provide significant improvement in 
+compression ratio only in some rare cases.
+
+The LZMA properties can be encoded into two bytes in new scheme:
+
+Offset Size Description
+
+  0     1   The dictionary size encoded with LZMA2 scheme
+  1     1   LZMA model properties (lc, lp, pb) in encoded form
+
+
+The RAM usage 
+=============
+
+The RAM usage for LZMA decoder is determined by the following parts:
+
+1) The Sliding Window (from 4 KiB to 4 GiB).
+2) The probability model counter arrays (arrays of 16-bit variables).
+3) Some additional state variables (about 10 variables of 32-bit integers).
+
+
+The RAM usage for Sliding Window
+--------------------------------
+
+There are two main scenarios of decoding:
+
+1) The decoding of full stream to one RAM buffer.
+
+  If we decode full LZMA stream to one output buffer in RAM, the decoder 
+  can use that output buffer as sliding window. So the decoder doesn't 
+  need additional buffer allocated for sliding window.
+
+2) The decoding to some external storage.
+
+  If we decode LZMA stream to external storage, the decoder must allocate
+  the buffer for sliding window. The size of that buffer must be equal 
+  or larger than the value of dictionary size from properties of LZMA stream.
+
+In this specification we describe the code for decoding to some external
+storage. The optimized version of code for decoding of full stream to one
+output RAM buffer can require some minor changes in code.
+
+
+The RAM usage for the probability model counters
+------------------------------------------------
+
+The size of the probability model counter arrays is calculated with the 
+following formula:
+
+size_of_prob_arrays = 1846 + 768 * (1 << (lp + lc))
+
+Each probability model counter is 11-bit unsigned integer.
+If we use 16-bit integer variables (2-byte integers) for these probability 
+model counters, the RAM usage required by probability model counter arrays 
+can be estimated with the following formula:
+
+  RAM = 4 KiB + 1.5 KiB * (1 << (lp + lc))
+
+For example, for default LZMA parameters (lp = 0 and lc = 3), the RAM usage is
+
+  RAM_lc3_lp0 = 4 KiB + 1.5 KiB * 8 = 16 KiB
+
+The maximum RAM state usage is required for decoding the stream with lp = 4 
+and lc = 8:
+
+  RAM_lc8_lp4 = 4 KiB + 1.5 KiB * 4096 = 6148 KiB
+
+If the decoder uses LZMA2's limited property condition 
+(lc + lp <= 4), the RAM usage will be not larger than
+
+  RAM_lc_lp_4 = 4 KiB + 1.5 KiB * 16 = 28 KiB
+
+
+The RAM usage for encoder
+-------------------------
+
+There are many variants for LZMA encoding code.
+These variants have different values for memory consumption.
+Note that memory consumption for LZMA Encoder can not be 
+smaller than memory consumption of LZMA Decoder for same stream.
+
+The RAM usage required by modern effective implementation of 
+LZMA Encoder can be estimated with the following formula:
+
+  Encoder_RAM_Usage = 4 MiB + 11 * dictionarySize.
+
+But there are some modes of the encoder that require less memory.
+
+
+LZMA Decoding
+=============
+
+The LZMA compression algorithm uses LZ-based compression with Sliding Window
+and Range Encoding as entropy coding method.
+
+
+Sliding Window
+--------------
+
+LZMA uses Sliding Window compression similar to LZ77 algorithm.
+
+LZMA stream must be decoded to the sequence that consists
+of MATCHES and LITERALS:
+  
+  - a LITERAL is a 8-bit character (one byte).
+    The decoder just puts that LITERAL to the uncompressed stream.
+  
+  - a MATCH is a pair of two numbers (DISTANCE-LENGTH pair).
+    The decoder takes one byte exactly "DISTANCE" characters behind
+    current position in the uncompressed stream and puts it to 
+    uncompressed stream. The decoder must repeat it "LENGTH" times.
+
+The "DISTANCE" can not be larger than dictionary size.
+And the "DISTANCE" can not be larger than the number of bytes in
+the uncompressed stream that were decoded before that match.
+
+In this specification we use cyclic buffer to implement Sliding Window
+for LZMA decoder:
+
+class COutWindow
+{
+  Byte *Buf;
+  UInt32 Pos;
+  UInt32 Size;
+  bool IsFull;
+
+public:
+  unsigned TotalPos;
+  COutStream OutStream;
+
+  COutWindow(): Buf(NULL) {}
+  ~COutWindow() { delete []Buf; }
+ 
+  void Create(UInt32 dictSize)
+  {
+    Buf = new Byte[dictSize];
+    Pos = 0;
+    Size = dictSize;
+    IsFull = false;
+    TotalPos = 0;
+  }
+
+  void PutByte(Byte b)
+  {
+    TotalPos++;
+    Buf[Pos++] = b;
+    if (Pos == Size)
+    {
+      Pos = 0;
+      IsFull = true;
+    }
+    OutStream.WriteByte(b);
+  }
+
+  Byte GetByte(UInt32 dist) const
+  {
+    return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
+  }
+
+  void CopyMatch(UInt32 dist, unsigned len)
+  {
+    for (; len > 0; len--)
+      PutByte(GetByte(dist));
+  }
+
+  bool CheckDistance(UInt32 dist) const
+  {
+    return dist <= Pos || IsFull;
+  }
+
+  bool IsEmpty() const
+  {
+    return Pos == 0 && !IsFull;
+  }
+};
+
+
+In another implementation it's possible to use one buffer that contains 
+Sliding Window and the whole data stream after uncompressing.
+
+
+Range Decoder
+-------------
+
+LZMA algorithm uses Range Encoding (1) as entropy coding method.
+
+LZMA stream contains just one very big number in big-endian encoding.
+LZMA decoder uses the Range Decoder to extract a sequence of binary
+symbols from that big number.
+
+The state of the Range Decoder:
+
+struct CRangeDecoder
+{
+  UInt32 Range; 
+  UInt32 Code;
+  InputStream *InStream;
+
+  bool Corrupted;
+}
+
+The notes about UInt32 type for the "Range" and "Code" variables:
+
+  It's possible to use 64-bit (unsigned or signed) integer type
+  for the "Range" and the "Code" variables instead of 32-bit unsigned,
+  but some additional code must be used to truncate the values to 
+  low 32-bits after some operations.
+
+  If the programming language does not support 32-bit unsigned integer type 
+  (like in case of JAVA language), it's possible to use 32-bit signed integer, 
+  but some code must be changed. For example, it's required to change the code
+  that uses comparison operations for UInt32 variables in this specification.
+
+The Range Decoder can be in some states that can be treated as 
+"Corruption" in LZMA stream. The Range Decoder uses the variable "Corrupted":
+
+  (Corrupted == false), if the Range Decoder has not detected any corruption.
+  (Corrupted == true), if the Range Decoder has detected some corruption.
+
+The reference LZMA Decoder ignores the value of the "Corrupted" variable.
+So it continues to decode the stream, even if the corruption can be detected
+in the Range Decoder. To provide the full compatibility with output of the 
+reference LZMA Decoder, another LZMA Decoder implementations must also 
+ignore the value of the "Corrupted" variable.
+
+The LZMA Encoder is required to create only such LZMA streams, that will not 
+lead the Range Decoder to states, where the "Corrupted" variable is set to true.
+
+The Range Decoder reads first 5 bytes from input stream to initialize
+the state:
+
+bool CRangeDecoder::Init()
+{
+  Corrupted = false;
+  Range = 0xFFFFFFFF;
+  Code = 0;
+
+  Byte b = InStream->ReadByte();
+  
+  for (int i = 0; i < 4; i++)
+    Code = (Code << 8) | InStream->ReadByte();
+  
+  if (b != 0 || Code == Range)
+    Corrupted = true;
+  return b == 0;
+}
+
+The LZMA Encoder always writes ZERO in initial byte of compressed stream.
+That scheme allows to simplify the code of the Range Encoder in the 
+LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must
+stop decoding and report error.
+
+After the last bit of data was decoded by Range Decoder, the value of the
+"Code" variable must be equal to 0. The LZMA Decoder must check it by 
+calling the IsFinishedOK() function:
+
+  bool IsFinishedOK() const { return Code == 0; }
+
+If there is corruption in data stream, there is big probability that
+the "Code" value will be not equal to 0 in the Finish() function. So that
+check in the IsFinishedOK() function provides very good feature for 
+corruption detection.
+
+The value of the "Range" variable before each bit decoding can not be smaller 
+than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in 
+described range.
+
+#define kTopValue ((UInt32)1 << 24)
+
+void CRangeDecoder::Normalize()
+{
+  if (Range < kTopValue)
+  {
+    Range <<= 8;
+    Code = (Code << 8) | InStream->ReadByte();
+  }
+}
+
+Notes: if the size of the "Code" variable is larger than 32 bits, it's
+required to keep only low 32 bits of the "Code" variable after the change
+in Normalize() function.
+
+If the LZMA Stream is not corrupted, the value of the "Code" variable is
+always smaller than value of the "Range" variable.
+But the Range Decoder ignores some types of corruptions, so the value of
+the "Code" variable can be equal or larger than value of the "Range" variable
+for some "Corrupted" archives.
+
+
+LZMA uses Range Encoding only with binary symbols of two types:
+  1) binary symbols with fixed and equal probabilities (direct bits)
+  2) binary symbols with predicted probabilities
+
+The DecodeDirectBits() function decodes the sequence of direct bits:
+
+UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
+{
+  UInt32 res = 0;
+  do
+  {
+    Range >>= 1;
+    Code -= Range;
+    UInt32 t = 0 - ((UInt32)Code >> 31);
+    Code += Range & t;
+    
+    if (Code == Range)
+      Corrupted = true;
+    
+    Normalize();
+    res <<= 1;
+    res += t + 1;
+  }
+  while (--numBits);
+  return res;
+}
+
+
+The Bit Decoding with Probability Model
+---------------------------------------
+
+The task of Bit Probability Model is to estimate probabilities of binary
+symbols. And then it provides the Range Decoder with that information.
+The better prediction provides better compression ratio.
+The Bit Probability Model uses statistical data of previous decoded
+symbols.
+
+That estimated probability is presented as 11-bit unsigned integer value
+that represents the probability of symbol "0".
+
+#define kNumBitModelTotalBits 11
+
+Mathematical probabilities can be presented with the following formulas:
+     probability(symbol_0) = prob / 2048.
+     probability(symbol_1) =  1 - Probability(symbol_0) =  
+                           =  1 - prob / 2048 =  
+                           =  (2048 - prob) / 2048
+where the "prob" variable contains 11-bit integer probability counter.
+
+It's recommended to use 16-bit unsigned integer type, to store these 11-bit
+probability values:
+
+typedef UInt16 CProb;
+
+Each probability value must be initialized with value ((1 << 11) / 2),
+that represents the state, where probabilities of symbols 0 and 1 
+are equal to 0.5:
+
+#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
+
+The INIT_PROBS macro is used to initialize the array of CProb variables:
+
+#define INIT_PROBS(p) \
+ { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
+
+
+The DecodeBit() function decodes one bit.
+The LZMA decoder provides the pointer to CProb variable that contains 
+information about estimated probability for symbol 0 and the Range Decoder 
+updates that CProb variable after decoding. The Range Decoder increases 
+estimated probability of the symbol that was decoded:
+
+#define kNumMoveBits 5
+
+unsigned CRangeDecoder::DecodeBit(CProb *prob)
+{
+  unsigned v = *prob;
+  UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
+  unsigned symbol;
+  if (Code < bound)
+  {
+    v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
+    Range = bound;
+    symbol = 0;
+  }
+  else
+  {
+    v -= v >> kNumMoveBits;
+    Code -= bound;
+    Range -= bound;
+    symbol = 1;
+  }
+  *prob = (CProb)v;
+  Normalize();
+  return symbol;
+}
+
+
+The Binary Tree of bit model counters
+-------------------------------------
+
+LZMA uses a tree of Bit model variables to decode symbol that needs
+several bits for storing. There are two versions of such trees in LZMA:
+  1) the tree that decodes bits from high bit to low bit (the normal scheme).
+  2) the tree that decodes bits from low bit to high bit (the reverse scheme).
+
+Each binary tree structure supports different size of decoded symbol
+(the size of binary sequence that contains value of symbol).
+If that size of decoded symbol is "NumBits" bits, the tree structure 
+uses the array of (2 << NumBits) counters of CProb type. 
+But only ((2 << NumBits) - 1) items are used by encoder and decoder.
+The first item (the item with index equal to 0) in array is unused.
+That scheme with unused array's item allows to simplify the code.
+
+unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
+{
+  unsigned m = 1;
+  unsigned symbol = 0;
+  for (unsigned i = 0; i < numBits; i++)
+  {
+    unsigned bit = rc->DecodeBit(&probs[m]);
+    m <<= 1;
+    m += bit;
+    symbol |= (bit << i);
+  }
+  return symbol;
+}
+
+template <unsigned NumBits>
+class CBitTreeDecoder
+{
+  CProb Probs[(unsigned)1 << NumBits];
+
+public:
+
+  void Init()
+  {
+    INIT_PROBS(Probs);
+  }
+
+  unsigned Decode(CRangeDecoder *rc)
+  {
+    unsigned m = 1;
+    for (unsigned i = 0; i < NumBits; i++)
+      m = (m << 1) + rc->DecodeBit(&Probs[m]);
+    return m - ((unsigned)1 << NumBits);
+  }
+
+  unsigned ReverseDecode(CRangeDecoder *rc)
+  {
+    return BitTreeReverseDecode(Probs, NumBits, rc);
+  }
+};
+
+
+LZ part of LZMA 
+---------------
+
+LZ part of LZMA describes details about the decoding of MATCHES and LITERALS.
+
+
+The Literal Decoding
+--------------------
+
+The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where 
+each table contains 0x300 CProb values:
+
+  CProb *LitProbs;
+
+  void CreateLiterals()
+  {
+    LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
+  }
+  
+  void InitLiterals()
+  {
+    UInt32 num = (UInt32)0x300 << (lc + lp);
+    for (UInt32 i = 0; i < num; i++)
+      LitProbs[i] = PROB_INIT_VAL;
+  }
+
+To select the table for decoding it uses the context that consists of
+(lc) high bits from previous literal and (lp) low bits from value that
+represents current position in outputStream.
+
+If (State > 7), the Literal Decoder also uses "matchByte" that represents 
+the byte in OutputStream at position the is the DISTANCE bytes before 
+current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair
+of latest decoded match.
+
+The following code decodes one literal and puts it to Sliding Window buffer:
+
+  void DecodeLiteral(unsigned state, UInt32 rep0)
+  {
+    unsigned prevByte = 0;
+    if (!OutWindow.IsEmpty())
+      prevByte = OutWindow.GetByte(1);
+    
+    unsigned symbol = 1;
+    unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
+    CProb *probs = &LitProbs[(UInt32)0x300 * litState];
+    
+    if (state >= 7)
+    {
+      unsigned matchByte = OutWindow.GetByte(rep0 + 1);
+      do
+      {
+        unsigned matchBit = (matchByte >> 7) & 1;
+        matchByte <<= 1;
+        unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
+        symbol = (symbol << 1) | bit;
+        if (matchBit != bit)
+          break;
+      }
+      while (symbol < 0x100);
+    }
+    while (symbol < 0x100)
+      symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
+    OutWindow.PutByte((Byte)(symbol - 0x100));
+  }
+
+
+The match length decoding
+-------------------------
+
+The match length decoder returns normalized (zero-based value) 
+length of match. That value can be converted to real length of the match 
+with the following code:
+
+#define kMatchMinLen 2
+
+    matchLen = len + kMatchMinLen;
+
+The match length decoder can return the values from 0 to 271.
+And the corresponded real match length values can be in the range 
+from 2 to 273.
+
+The following scheme is used for the match length encoding:
+
+  Binary encoding    Binary Tree structure    Zero-based match length 
+  sequence                                    (binary + decimal):
+
+  0 xxx              LowCoder[posState]       xxx
+  1 0 yyy            MidCoder[posState]       yyy + 8
+  1 1 zzzzzzzz       HighCoder                zzzzzzzz + 16
+
+LZMA uses bit model variable "Choice" to decode the first selection bit.
+
+If the first selection bit is equal to 0, the decoder uses binary tree 
+  LowCoder[posState] to decode 3-bit zero-based match length (xxx).
+
+If the first selection bit is equal to 1, the decoder uses bit model 
+  variable "Choice2" to decode the second selection bit.
+
+  If the second selection bit is equal to 0, the decoder uses binary tree 
+    MidCoder[posState] to decode 3-bit "yyy" value, and zero-based match
+    length is equal to (yyy + 8).
+
+  If the second selection bit is equal to 1, the decoder uses binary tree 
+    HighCoder to decode 8-bit "zzzzzzzz" value, and zero-based 
+    match length is equal to (zzzzzzzz + 16).
+
+LZMA uses "posState" value as context to select the binary tree 
+from LowCoder and MidCoder binary tree arrays:
+
+    unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
+
+The full code of the length decoder:
+
+class CLenDecoder
+{
+  CProb Choice;
+  CProb Choice2;
+  CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
+  CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
+  CBitTreeDecoder<8> HighCoder;
+
+public:
+
+  void Init()
+  {
+    Choice = PROB_INIT_VAL;
+    Choice2 = PROB_INIT_VAL;
+    HighCoder.Init();
+    for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
+    {
+      LowCoder[i].Init();
+      MidCoder[i].Init();
+    }
+  }
+
+  unsigned Decode(CRangeDecoder *rc, unsigned posState)
+  {
+    if (rc->DecodeBit(&Choice) == 0)
+      return LowCoder[posState].Decode(rc);
+    if (rc->DecodeBit(&Choice2) == 0)
+      return 8 + MidCoder[posState].Decode(rc);
+    return 16 + HighCoder.Decode(rc);
+  }
+};
+
+The LZMA decoder uses two instances of CLenDecoder class.
+The first instance is for the matches of "Simple Match" type,
+and the second instance is for the matches of "Rep Match" type:
+
+  CLenDecoder LenDecoder;
+  CLenDecoder RepLenDecoder;
+
+
+The match distance decoding
+---------------------------
+
+LZMA supports dictionary sizes up to 4 GiB minus 1.
+The value of match distance (decoded by distance decoder) can be 
+from 1 to 2^32. But the distance value that is equal to 2^32 is used to
+indicate the "End of stream" marker. So real largest match distance 
+that is used for LZ-window match is (2^32 - 1).
+
+LZMA uses normalized match length (zero-based length) 
+to calculate the context state "lenState" do decode the distance value:
+
+#define kNumLenToPosStates 4
+
+    unsigned lenState = len;
+    if (lenState > kNumLenToPosStates - 1)
+      lenState = kNumLenToPosStates - 1;
+
+The distance decoder returns the "dist" value that is zero-based value 
+of match distance. The real match distance can be calculated with the
+following code:
+  
+  matchDistance = dist + 1; 
+
+The state of the distance decoder and the initialization code: 
+
+  #define kEndPosModelIndex 14
+  #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+  #define kNumAlignBits 4
+
+  CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
+  CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
+  CBitTreeDecoder<kNumAlignBits> AlignDecoder;
+
+  void InitDist()
+  {
+    for (unsigned i = 0; i < kNumLenToPosStates; i++)
+      PosSlotDecoder[i].Init();
+    AlignDecoder.Init();
+    INIT_PROBS(PosDecoders);
+  }
+
+At first stage the distance decoder decodes 6-bit "posSlot" value with bit
+tree decoder from PosSlotDecoder array. It's possible to get 2^6=64 different 
+"posSlot" values.
+
+    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
+
+The encoding scheme for distance value is shown in the following table:
+
+posSlot (decimal) /
+      zero-based distance (binary)
+ 0    0
+ 1    1
+ 2    10
+ 3    11
+
+ 4    10 x
+ 5    11 x
+ 6    10 xx
+ 7    11 xx
+ 8    10 xxx
+ 9    11 xxx
+10    10 xxxx
+11    11 xxxx
+12    10 xxxxx
+13    11 xxxxx
+
+14    10 yy zzzz
+15    11 yy zzzz
+16    10 yyy zzzz
+17    11 yyy zzzz
+...
+62    10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz
+63    11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz
+
+where 
+  "x ... x" means the sequence of binary symbols encoded with binary tree and 
+      "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13.
+  "y" means direct bit encoded with range coder.
+  "zzzz" means the sequence of four binary symbols encoded with binary
+      tree with "Reverse" scheme, where one common binary tree "AlignDecoder"
+      is used for all posSlot values.
+
+If (posSlot < 4), the "dist" value is equal to posSlot value.
+
+If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of
+  the high bits of "dist" value and the number of the low bits.
+
+  If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders.
+    (one separated bit tree decoder per one posSlot value) and "Reverse" scheme.
+    In this implementation we use one CProb array "PosDecoders" that contains 
+    all CProb variables for all these bit decoders.
+  
+  if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct 
+    bits from RangeDecoder and the low 4 bits are decoded with a bit tree 
+    decoder "AlignDecoder" with "Reverse" scheme.
+
+The code to decode zero-based match distance:
+  
+  unsigned DecodeDistance(unsigned len)
+  {
+    unsigned lenState = len;
+    if (lenState > kNumLenToPosStates - 1)
+      lenState = kNumLenToPosStates - 1;
+    
+    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
+    if (posSlot < 4)
+      return posSlot;
+    
+    unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
+    UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
+    if (posSlot < kEndPosModelIndex)
+      dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
+    else
+    {
+      dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
+      dist += AlignDecoder.ReverseDecode(&RangeDec);
+    }
+    return dist;
+  }
+
+
+
+LZMA Decoding modes
+-------------------
+
+There are 2 types of LZMA streams:
+
+1) The stream with "End of stream" marker.
+2) The stream without "End of stream" marker.
+
+And the LZMA Decoder supports 3 modes of decoding:
+
+1) The unpack size is undefined. The LZMA decoder stops decoding after 
+   getting "End of stream" marker. 
+   The input variables for that case:
+    
+      markerIsMandatory = true
+      unpackSizeDefined = false
+      unpackSize contains any value
+
+2) The unpack size is defined and LZMA decoder supports both variants, 
+   where the stream can contain "End of stream" marker or the stream is
+   finished without "End of stream" marker. The LZMA decoder must detect 
+   any of these situations.
+   The input variables for that case:
+    
+      markerIsMandatory = false
+      unpackSizeDefined = true
+      unpackSize contains unpack size
+
+3) The unpack size is defined and the LZMA stream must contain 
+   "End of stream" marker
+   The input variables for that case:
+    
+      markerIsMandatory = true
+      unpackSizeDefined = true
+      unpackSize contains unpack size
+
+
+The main loop of decoder
+------------------------
+
+The main loop of LZMA decoder:
+
+Initialize the LZMA state.
+loop
+{
+  // begin of loop
+  Check "end of stream" conditions.
+  Decode Type of MATCH / LITERAL. 
+    If it's LITERAL, decode LITERAL value and put the LITERAL to Window.
+    If it's MATCH, decode the length of match and the match distance. 
+        Check error conditions, check end of stream conditions and copy
+        the sequence of match bytes from sliding window to current position
+        in window.
+  Go to begin of loop
+}
+
+The reference implementation of LZMA decoder uses "unpackSize" variable
+to keep the number of remaining bytes in output stream. So it reduces 
+"unpackSize" value after each decoded LITERAL or MATCH.
+
+The following code contains the "end of stream" condition check at the start
+of the loop:
+
+    if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
+      if (RangeDec.IsFinishedOK())
+        return LZMA_RES_FINISHED_WITHOUT_MARKER;
+
+LZMA uses three types of matches:
+
+1) "Simple Match" -     the match with distance value encoded with bit models.
+
+2) "Rep Match" -        the match that uses the distance from distance
+                        history table.
+
+3) "Short Rep Match" -  the match of single byte length, that uses the latest 
+                        distance from distance history table.
+
+The LZMA decoder keeps the history of latest 4 match distances that were used 
+by decoder. That set of 4 variables contains zero-based match distances and 
+these variables are initialized with zero values:
+
+  UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
+
+The LZMA decoder uses binary model variables to select type of MATCH or LITERAL:
+
+#define kNumStates 12
+#define kNumPosBitsMax 4
+
+  CProb IsMatch[kNumStates << kNumPosBitsMax];
+  CProb IsRep[kNumStates];
+  CProb IsRepG0[kNumStates];
+  CProb IsRepG1[kNumStates];
+  CProb IsRepG2[kNumStates];
+  CProb IsRep0Long[kNumStates << kNumPosBitsMax];
+
+The decoder uses "state" variable value to select exact variable 
+from "IsRep", "IsRepG0", "IsRepG1" and "IsRepG2" arrays.
+The "state" variable can get the value from 0 to 11.
+Initial value for "state" variable is zero:
+
+  unsigned state = 0;
+
+The "state" variable is updated after each LITERAL or MATCH with one of the
+following functions:
+
+unsigned UpdateState_Literal(unsigned state)
+{
+  if (state < 4) return 0;
+  else if (state < 10) return state - 3;
+  else return state - 6;
+}
+unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
+unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
+unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
+
+The decoder calculates "state2" variable value to select exact variable from 
+"IsMatch" and "IsRep0Long" arrays:
+
+unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
+unsigned state2 = (state << kNumPosBitsMax) + posState;
+
+The decoder uses the following code flow scheme to select exact 
+type of LITERAL or MATCH:
+
+IsMatch[state2] decode
+  0 - the Literal
+  1 - the Match
+    IsRep[state] decode
+      0 - Simple Match
+      1 - Rep Match
+        IsRepG0[state] decode
+          0 - the distance is rep0
+            IsRep0Long[state2] decode
+              0 - Short Rep Match
+              1 - Rep Match 0
+          1 - 
+            IsRepG1[state] decode
+              0 - Rep Match 1
+              1 - 
+                IsRepG2[state] decode
+                  0 - Rep Match 2
+                  1 - Rep Match 3
+
+
+LITERAL symbol
+--------------
+If the value "0" was decoded with IsMatch[state2] decoding, we have "LITERAL" type.
+
+At first the LZMA decoder must check that it doesn't exceed 
+specified uncompressed size:
+
+      if (unpackSizeDefined && unpackSize == 0)
+        return LZMA_RES_ERROR;
+
+Then it decodes literal value and puts it to sliding window:
+
+      DecodeLiteral(state, rep0);
+
+Then the decoder must update the "state" value and "unpackSize" value;
+
+      state = UpdateState_Literal(state);
+      unpackSize--;
+
+Then the decoder must go to the begin of main loop to decode next Match or Literal.
+
+
+Simple Match
+------------
+
+If the value "1" was decoded with IsMatch[state2] decoding,
+we have the "Simple Match" type.
+
+The distance history table is updated with the following scheme:
+    
+      rep3 = rep2;
+      rep2 = rep1;
+      rep1 = rep0;
+
+The zero-based length is decoded with "LenDecoder":
+
+      len = LenDecoder.Decode(&RangeDec, posState);
+
+The state is update with UpdateState_Match function:
+
+      state = UpdateState_Match(state);
+
+and the new "rep0" value is decoded with DecodeDistance:
+
+      rep0 = DecodeDistance(len);
+
+That "rep0" will be used as zero-based distance for current match.
+
+If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have 
+"End of stream" marker, so we can stop decoding and check finishing 
+condition in Range Decoder:
+
+      if (rep0 == 0xFFFFFFFF)
+        return RangeDec.IsFinishedOK() ?
+            LZMA_RES_FINISHED_WITH_MARKER :
+            LZMA_RES_ERROR;
+
+If uncompressed size is defined, LZMA decoder must check that it doesn't 
+exceed that specified uncompressed size:
+
+      if (unpackSizeDefined && unpackSize == 0)
+        return LZMA_RES_ERROR;
+
+Also the decoder must check that "rep0" value is not larger than dictionary size
+and is not larger than the number of already decoded bytes:
+
+      if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
+        return LZMA_RES_ERROR;
+
+Then the decoder must copy match bytes as described in 
+"The match symbols copying" section.
+
+
+Rep Match
+---------
+
+If the LZMA decoder has decoded the value "1" with IsRep[state] variable,
+we have "Rep Match" type.
+
+At first the LZMA decoder must check that it doesn't exceed 
+specified uncompressed size:
+
+      if (unpackSizeDefined && unpackSize == 0)
+        return LZMA_RES_ERROR;
+
+Also the decoder must return error, if the LZ window is empty:
+
+      if (OutWindow.IsEmpty())
+        return LZMA_RES_ERROR;
+
+If the match type is "Rep Match", the decoder uses one of the 4 variables of
+distance history table to get the value of distance for current match.
+And there are 4 corresponding ways of decoding flow. 
+
+The decoder updates the distance history with the following scheme 
+depending from type of match:
+
+- "Rep Match 0" or "Short Rep Match":
+      ; LZMA doesn't update the distance history    
+
+- "Rep Match 1":
+      UInt32 dist = rep1;
+      rep1 = rep0;
+      rep0 = dist;
+
+- "Rep Match 2":
+      UInt32 dist = rep2;
+      rep2 = rep1;
+      rep1 = rep0;
+      rep0 = dist;
+
+- "Rep Match 3":
+      UInt32 dist = rep3;
+      rep3 = rep2;
+      rep2 = rep1;
+      rep1 = rep0;
+      rep0 = dist;
+
+Then the decoder decodes exact subtype of "Rep Match" using "IsRepG0", "IsRep0Long",
+"IsRepG1", "IsRepG2".
+
+If the subtype is "Short Rep Match", the decoder updates the state, puts 
+the one byte from window to current position in window and goes to next 
+MATCH/LITERAL symbol (the begin of main loop):
+
+          state = UpdateState_ShortRep(state);
+          OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
+          unpackSize--;
+          continue;
+
+In other cases (Rep Match 0/1/2/3), it decodes the zero-based 
+length of match with "RepLenDecoder" decoder:
+
+      len = RepLenDecoder.Decode(&RangeDec, posState);
+
+Then it updates the state:
+
+      state = UpdateState_Rep(state);
+
+Then the decoder must copy match bytes as described in 
+"The Match symbols copying" section.
+
+
+The match symbols copying
+-------------------------
+
+If we have the match (Simple Match or Rep Match 0/1/2/3), the decoder must
+copy the sequence of bytes with calculated match distance and match length.
+If uncompressed size is defined, LZMA decoder must check that it doesn't 
+exceed that specified uncompressed size:
+
+    len += kMatchMinLen;
+    bool isError = false;
+    if (unpackSizeDefined && unpackSize < len)
+    {
+      len = (unsigned)unpackSize;
+      isError = true;
+    }
+    OutWindow.CopyMatch(rep0 + 1, len);
+    unpackSize -= len;
+    if (isError)
+      return LZMA_RES_ERROR;
+
+Then the decoder must go to the begin of main loop to decode next MATCH or LITERAL.
+
+
+
+NOTES
+-----
+
+This specification doesn't describe the variant of decoder implementation 
+that supports partial decoding. Such partial decoding case can require some 
+changes in "end of stream" condition checks code. Also such code 
+can use additional status codes, returned by decoder.
+
+This specification uses C++ code with templates to simplify describing.
+The optimized version of LZMA decoder doesn't need templates.
+Such optimized version can use just two arrays of CProb variables:
+  1) The dynamic array of CProb variables allocated for the Literal Decoder.
+  2) The one common array that contains all other CProb variables.
+
+
+References:      
+
+1. G. N. N. Martin, Range encoding: an algorithm for removing redundancy 
+   from a digitized message, Video & Data Recording Conference, 
+   Southampton, UK, July 24-27, 1979.
diff --git a/deps/LZMA-SDK/DOC/lzma.txt b/deps/LZMA-SDK/DOC/lzma.txt
new file mode 100644
index 000000000..1f92142ea
--- /dev/null
+++ b/deps/LZMA-SDK/DOC/lzma.txt
@@ -0,0 +1,328 @@
+LZMA compression
+----------------
+Version: 9.35
+
+This file describes LZMA encoding and decoding functions written in C language.
+
+LZMA is an improved version of famous LZ77 compression algorithm. 
+It was improved in way of maximum increasing of compression ratio,
+keeping high decompression speed and low memory requirements for 
+decompressing.
+
+Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK)
+
+Also you can look source code for LZMA encoding and decoding:
+  C/Util/Lzma/LzmaUtil.c
+
+
+LZMA compressed file format
+---------------------------
+Offset Size Description
+  0     1   Special LZMA properties (lc,lp, pb in encoded form)
+  1     4   Dictionary size (little endian)
+  5     8   Uncompressed size (little endian). -1 means unknown size
+ 13         Compressed data
+
+
+
+ANSI-C LZMA Decoder
+~~~~~~~~~~~~~~~~~~~
+
+Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58.
+If you want to use old interfaces you can download previous version of LZMA SDK
+from sourceforge.net site.
+
+To use ANSI-C LZMA Decoder you need the following files:
+1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h
+
+Look example code:
+  C/Util/Lzma/LzmaUtil.c
+
+
+Memory requirements for LZMA decoding
+-------------------------------------
+
+Stack usage of LZMA decoding function for local variables is not 
+larger than 200-400 bytes.
+
+LZMA Decoder uses dictionary buffer and internal state structure.
+Internal state structure consumes
+  state_size = (4 + (1.5 << (lc + lp))) KB
+by default (lc=3, lp=0), state_size = 16 KB.
+
+
+How To decompress data
+----------------------
+
+LZMA Decoder (ANSI-C version) now supports 2 interfaces:
+1) Single-call Decompressing
+2) Multi-call State Decompressing (zlib-like interface)
+
+You must use external allocator:
+Example:
+void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); }
+void SzFree(void *p, void *address) { p = p; free(address); }
+ISzAlloc alloc = { SzAlloc, SzFree };
+
+You can use p = p; operator to disable compiler warnings.
+
+
+Single-call Decompressing
+-------------------------
+When to use: RAM->RAM decompressing
+Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h
+Compile defines: no defines
+Memory Requirements:
+  - Input buffer: compressed size
+  - Output buffer: uncompressed size
+  - LZMA Internal Structures: state_size (16 KB for default settings) 
+
+Interface:
+  int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+      const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 
+      ELzmaStatus *status, ISzAlloc *alloc);
+  In: 
+    dest     - output data
+    destLen  - output data size
+    src      - input data
+    srcLen   - input data size
+    propData - LZMA properties  (5 bytes)
+    propSize - size of propData buffer (5 bytes)
+    finishMode - It has meaning only if the decoding reaches output limit (*destLen).
+         LZMA_FINISH_ANY - Decode just destLen bytes.
+         LZMA_FINISH_END - Stream must be finished after (*destLen).
+                           You can use LZMA_FINISH_END, when you know that 
+                           current output buffer covers last bytes of stream. 
+    alloc    - Memory allocator.
+
+  Out: 
+    destLen  - processed output size 
+    srcLen   - processed input size 
+
+  Output:
+    SZ_OK
+      status:
+        LZMA_STATUS_FINISHED_WITH_MARK
+        LZMA_STATUS_NOT_FINISHED 
+        LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+    SZ_ERROR_DATA - Data error
+    SZ_ERROR_MEM  - Memory allocation error
+    SZ_ERROR_UNSUPPORTED - Unsupported properties
+    SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+
+  If LZMA decoder sees end_marker before reaching output limit, it returns OK result,
+  and output value of destLen will be less than output buffer size limit.
+
+  You can use multiple checks to test data integrity after full decompression:
+    1) Check Result and "status" variable.
+    2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+    3) Check that output(srcLen) = compressedSize, if you know real compressedSize. 
+       You must use correct finish mode in that case. */ 
+
+
+Multi-call State Decompressing (zlib-like interface)
+----------------------------------------------------
+
+When to use: file->file decompressing 
+Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h
+
+Memory Requirements:
+ - Buffer for input stream: any size (for example, 16 KB)
+ - Buffer for output stream: any size (for example, 16 KB)
+ - LZMA Internal Structures: state_size (16 KB for default settings) 
+ - LZMA dictionary (dictionary size is encoded in LZMA properties header)
+
+1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header:
+   unsigned char header[LZMA_PROPS_SIZE + 8];
+   ReadFile(inFile, header, sizeof(header)
+
+2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties
+
+  CLzmaDec state;
+  LzmaDec_Constr(&state);
+  res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
+  if (res != SZ_OK)
+    return res;
+
+3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop
+
+  LzmaDec_Init(&state);
+  for (;;)
+  {
+    ... 
+    int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, 
+        const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode);
+    ...
+  }
+
+
+4) Free all allocated structures
+  LzmaDec_Free(&state, &g_Alloc);
+
+Look example code:
+  C/Util/Lzma/LzmaUtil.c
+
+
+How To compress data
+--------------------
+
+Compile files: 
+  7zTypes.h
+  Threads.h	
+  LzmaEnc.h
+  LzmaEnc.c
+  LzFind.h
+  LzFind.c
+  LzFindMt.h
+  LzFindMt.c
+  LzHash.h
+
+Memory Requirements:
+  - (dictSize * 11.5 + 6 MB) + state_size
+
+Lzma Encoder can use two memory allocators:
+1) alloc - for small arrays.
+2) allocBig - for big arrays.
+
+For example, you can use Large RAM Pages (2 MB) in allocBig allocator for 
+better compression speed. Note that Windows has bad implementation for 
+Large RAM Pages. 
+It's OK to use same allocator for alloc and allocBig.
+
+
+Single-call Compression with callbacks
+--------------------------------------
+
+Look example code:
+  C/Util/Lzma/LzmaUtil.c
+
+When to use: file->file compressing 
+
+1) you must implement callback structures for interfaces:
+ISeqInStream
+ISeqOutStream
+ICompressProgress
+ISzAlloc
+
+static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
+static void SzFree(void *p, void *address) {  p = p; MyFree(address); }
+static ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+  CFileSeqInStream inStream;
+  CFileSeqOutStream outStream;
+
+  inStream.funcTable.Read = MyRead;
+  inStream.file = inFile;
+  outStream.funcTable.Write = MyWrite;
+  outStream.file = outFile;
+
+
+2) Create CLzmaEncHandle object;
+
+  CLzmaEncHandle enc;
+
+  enc = LzmaEnc_Create(&g_Alloc);
+  if (enc == 0)
+    return SZ_ERROR_MEM;
+
+
+3) initialize CLzmaEncProps properties;
+
+  LzmaEncProps_Init(&props);
+
+  Then you can change some properties in that structure.
+
+4) Send LZMA properties to LZMA Encoder
+
+  res = LzmaEnc_SetProps(enc, &props);
+
+5) Write encoded properties to header
+
+    Byte header[LZMA_PROPS_SIZE + 8];
+    size_t headerSize = LZMA_PROPS_SIZE;
+    UInt64 fileSize;
+    int i;
+
+    res = LzmaEnc_WriteProperties(enc, header, &headerSize);
+    fileSize = MyGetFileLength(inFile);
+    for (i = 0; i < 8; i++)
+      header[headerSize++] = (Byte)(fileSize >> (8 * i));
+    MyWriteFileAndCheck(outFile, header, headerSize)
+
+6) Call encoding function:
+      res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, 
+        NULL, &g_Alloc, &g_Alloc);
+
+7) Destroy LZMA Encoder Object
+  LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
+
+
+If callback function return some error code, LzmaEnc_Encode also returns that code
+or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
+
+
+Single-call RAM->RAM Compression
+--------------------------------
+
+Single-call RAM->RAM Compression is similar to Compression with callbacks,
+but you provide pointers to buffers instead of pointers to stream callbacks:
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, 
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+
+Return code:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error 
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+
+
+
+Defines
+-------
+
+_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code.
+
+_LZMA_PROB32   - It can increase the speed on some 32-bit CPUs, but memory usage for 
+                 some structures will be doubled in that case.
+
+_LZMA_UINT32_IS_ULONG  - Define it if int is 16-bit on your compiler and long is 32-bit.
+
+_LZMA_NO_SYSTEM_SIZE_T  - Define it if you don't want to use size_t type.
+
+
+_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder.
+
+
+C++ LZMA Encoder/Decoder 
+~~~~~~~~~~~~~~~~~~~~~~~~
+C++ LZMA code use COM-like interfaces. So if you want to use it, 
+you can study basics of COM/OLE.
+C++ LZMA code is just wrapper over ANSI-C code.
+
+
+C++ Notes
+~~~~~~~~~~~~~~~~~~~~~~~~
+If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling),
+you must check that you correctly work with "new" operator.
+7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator.
+So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator:
+operator new(size_t size)
+{
+  void *p = ::malloc(size);
+  if (p == 0)
+    throw CNewException();
+  return p;
+}
+If you use MSCV that throws exception for "new" operator, you can compile without 
+"NewHandler.cpp". So standard exception will be used. Actually some code of 
+7-Zip catches any exception in internal code and converts it to HRESULT code.
+So you don't need to catch CNewException, if you call COM interfaces of 7-Zip.
+
+---
+
+http://www.7-zip.org
+http://www.7-zip.org/sdk.html
+http://www.7-zip.org/support.html
diff --git a/docs/changes.txt b/docs/changes.txt
index ca08b5d9a..883677ba5 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -1,3 +1,7 @@
+* changes v6.2.1 -> v6.x.x
+
+- Dependencies: Updated LZMA SDK from 19.00 to 21.02 alpha
+
 * changes v6.2.0 -> v6.2.1
 
 ##
diff --git a/docs/license_libs/LZMA_SDK_LICENSE.txt b/docs/license_libs/LZMA_SDK_LICENSE.txt
index 4ffb20350..3d1898207 100644
--- a/docs/license_libs/LZMA_SDK_LICENSE.txt
+++ b/docs/license_libs/LZMA_SDK_LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 1999-2018 Igor Pavlov
+Copyright (c) 1999-2021 Igor Pavlov
 
 LZMA SDK is written and placed in the public domain by Igor Pavlov.
 
diff --git a/include/workarounds/Windows.h b/include/workarounds/Windows.h
new file mode 100644
index 000000000..74fb132e5
--- /dev/null
+++ b/include/workarounds/Windows.h
@@ -0,0 +1,15 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _WINDOWS_H
+#define _WINDOWS_H
+
+// This is a workaround for files asking to include Windows.h instead of windows.h
+// The problem is that MinGW provides only windows.h
+// LZMA SDK will fail to cross compile for Windows on Linux
+
+#include <windows.h>
+
+#endif // _WINDOWS_H
diff --git a/src/Makefile b/src/Makefile
index 5ddb0d5e2..2f9f07432 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -253,6 +253,11 @@ ifeq ($(USE_SYSTEM_LZMA),1)
 LFLAGS                  += -llzmasdk
 endif
 
+## LZMA workaround for MSYS2
+ifeq ($(USE_SYSTEM_LZMA),0)
+CFLAGS                  += -Iinclude/workarounds
+endif
+
 # ZLIB
 CFLAGS                  += -I$(DEPS_ZLIB_PATH) -I$(DEPS_ZLIB_PATH)/contrib
 ifeq ($(USE_SYSTEM_ZLIB),1)