1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-05 23:10:00 +00:00
hashcat/deps/LZMA-SDK/C/Ppmd7.c

1105 lines
28 KiB
C

/* Ppmd7.c -- PPMdH codec
2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
#include <string.h>
#include "Ppmd7.h"
/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */
// #define PPMD7_ORDER_0_SUPPPORT
MY_ALIGN(16)
static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
MY_ALIGN(16)
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124
#define UNIT_SIZE 12
#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
#define I2U(indx) ((unsigned)p->Indx2Units[indx])
#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx])
#define REF(ptr) Ppmd_GetRef(p, ptr)
#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
#define STATS(ctx) Ppmd7_GetStats(p, ctx)
#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
#define SUFFIX(ctx) CTX((ctx)->Suffix)
typedef CPpmd7_Context * CTX_PTR;
struct CPpmd7_Node_;
typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref;
typedef struct CPpmd7_Node_
{
UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */
UInt16 NU;
CPpmd7_Node_Ref Next; /* must be at offset >= 4 */
CPpmd7_Node_Ref Prev;
} CPpmd7_Node;
#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node)
void Ppmd7_Construct(CPpmd7 *p)
{
unsigned i, k, m;
p->Base = NULL;
for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
{
unsigned step = (i >= 12 ? 4 : (i >> 2) + 1);
do { p->Units2Indx[k++] = (Byte)i; } while (--step);
p->Indx2Units[i] = (Byte)k;
}
p->NS2BSIndx[0] = (0 << 1);
p->NS2BSIndx[1] = (1 << 1);
memset(p->NS2BSIndx + 2, (2 << 1), 9);
memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11);
for (i = 0; i < 3; i++)
p->NS2Indx[i] = (Byte)i;
for (m = i, k = 1; i < 256; i++)
{
p->NS2Indx[i] = (Byte)m;
if (--k == 0)
k = (++m) - 2;
}
memcpy(p->ExpEscape, PPMD7_kExpEscape, 16);
}
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Base);
p->Size = 0;
p->Base = NULL;
}
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
Ppmd7_Free(p, alloc);
p->AlignOffset = (4 - size) & 3;
if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
return False;
p->Size = size;
}
return True;
}
// ---------- Internal Memory Allocator ----------
/* We can use CPpmd7_Node in list of free units (as in Ppmd8)
But we still need one additional list walk pass in GlueFreeBlocks().
So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode()
*/
#define EMPTY_NODE 0
static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
{
*((CPpmd_Void_Ref *)node) = p->FreeList[indx];
// ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
p->FreeList[indx] = REF(node);
}
static void *RemoveNode(CPpmd7 *p, unsigned indx)
{
CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
p->FreeList[indx] = *node;
// CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]);
// p->FreeList[indx] = node->Next;
return node;
}
static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
ptr = (Byte *)ptr + U2B(I2U(newIndx));
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
}
InsertNode(p, ptr, i);
}
/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
typedef union _CPpmd7_Node_Union
{
CPpmd7_Node Node;
CPpmd7_Node_Ref NextRef;
} CPpmd7_Node_Union;
/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks()
we use single linked list similar to Ppmd8 code */
static void GlueFreeBlocks(CPpmd7 *p)
{
/*
we use first UInt16 field of 12-bytes UNITs as record type stamp
CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0
CPpmd7_Context { UInt16 NumStats; : NumStats != 0
CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record
: Stamp == 1 for head record and guard
Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record.
*/
CPpmd7_Node_Ref head, n = 0;
p->GlueCount = 255;
/* we set guard NODE at LoUnit */
if (p->LoUnit != p->HiUnit)
((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1;
{
/* Create list of free blocks.
We still need one additional list walk pass before Glue. */
unsigned i;
for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
const UInt16 nu = I2U_UInt16(i);
CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
p->FreeList[i] = 0;
while (next != 0)
{
/* Don't change the order of the following commands: */
CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next);
const CPpmd7_Node_Ref tmp = next;
next = un->NextRef;
un->Node.Stamp = EMPTY_NODE;
un->Node.NU = nu;
un->Node.Next = n;
n = tmp;
}
}
}
head = n;
/* Glue and Fill must walk the list in same direction */
{
/* Glue free blocks */
CPpmd7_Node_Ref *prev = &head;
while (n)
{
CPpmd7_Node *node = NODE(n);
UInt32 nu = node->NU;
n = node->Next;
if (nu == 0)
{
*prev = n;
continue;
}
prev = &node->Next;
for (;;)
{
CPpmd7_Node *node2 = node + nu;
nu += node2->NU;
if (node2->Stamp != EMPTY_NODE || nu >= 0x10000)
break;
node->NU = (UInt16)nu;
node2->NU = 0;
}
}
}
/* Fill lists of free blocks */
for (n = head; n != 0;)
{
CPpmd7_Node *node = NODE(n);
UInt32 nu = node->NU;
unsigned i;
n = node->Next;
if (nu == 0)
continue;
for (; nu > 128; nu -= 128, node += 128)
InsertNode(p, node, PPMD_NUM_INDEXES - 1);
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
InsertNode(p, node + k, (unsigned)nu - k - 1);
}
InsertNode(p, node, i);
}
}
MY_NO_INLINE
static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
{
unsigned i;
if (p->GlueCount == 0)
{
GlueFreeBlocks(p);
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
}
i = indx;
do
{
if (++i == PPMD_NUM_INDEXES)
{
UInt32 numBytes = U2B(I2U(indx));
Byte *us = p->UnitsStart;
p->GlueCount--;
return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL;
}
}
while (p->FreeList[i] == 0);
{
void *block = RemoveNode(p, i);
SplitBlock(p, block, i, indx);
return block;
}
}
static void *AllocUnits(CPpmd7 *p, unsigned indx)
{
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
{
UInt32 numBytes = U2B(I2U(indx));
Byte *lo = p->LoUnit;
if ((UInt32)(p->HiUnit - lo) >= numBytes)
{
p->LoUnit = lo + numBytes;
return lo;
}
}
return AllocUnitsRare(p, indx);
}
#define MyMem12Cpy(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
/*
static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
{
unsigned i0 = U2I(oldNU);
unsigned i1 = U2I(newNU);
if (i0 == i1)
return oldPtr;
if (p->FreeList[i1] != 0)
{
void *ptr = RemoveNode(p, i1);
MyMem12Cpy(ptr, oldPtr, newNU);
InsertNode(p, oldPtr, i0);
return ptr;
}
SplitBlock(p, oldPtr, i0, i1);
return oldPtr;
}
*/
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{
Ppmd_SET_SUCCESSOR(p, v);
}
MY_NO_INLINE
static
void RestartModel(CPpmd7 *p)
{
unsigned i, k;
memset(p->FreeList, 0, sizeof(p->FreeList));
p->Text = p->Base + p->AlignOffset;
p->HiUnit = p->Text + p->Size;
p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
p->GlueCount = 0;
p->OrderFall = p->MaxOrder;
p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
p->PrevSuccess = 0;
{
CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
p->LoUnit += U2B(256 / 2);
p->MaxContext = p->MinContext = mc;
p->FoundState = s;
mc->NumStats = 256;
mc->Union2.SummFreq = 256 + 1;
mc->Union4.Stats = REF(s);
mc->Suffix = 0;
for (i = 0; i < 256; i++, s++)
{
s->Symbol = (Byte)i;
s->Freq = 1;
SetSuccessor(s, 0);
}
#ifdef PPMD7_ORDER_0_SUPPPORT
if (p->MaxOrder == 0)
{
CPpmd_Void_Ref r = REF(mc);
s = p->FoundState;
for (i = 0; i < 256; i++, s++)
SetSuccessor(s, r);
return;
}
#endif
}
for (i = 0; i < 128; i++)
for (k = 0; k < 8; k++)
{
unsigned m;
UInt16 *dest = p->BinSumm[i] + k;
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
for (m = 0; m < 64; m += 8)
dest[m] = val;
}
for (i = 0; i < 25; i++)
{
CPpmd_See *s = p->See[i];
unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4));
for (k = 0; k < 16; k++, s++)
{
s->Summ = (UInt16)summ;
s->Shift = (PPMD_PERIOD_BITS - 4);
s->Count = 4;
}
}
p->DummySee.Summ = 0; /* unused */
p->DummySee.Shift = PPMD_PERIOD_BITS;
p->DummySee.Count = 64; /* unused */
}
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
{
p->MaxOrder = maxOrder;
RestartModel(p);
}
/*
CreateSuccessors()
It's called when (FoundState->Successor) is RAW-Successor,
that is the link to position in Raw text.
So we create Context records and write the links to
FoundState->Successor and to identical RAW-Successors in suffix
contexts of MinContex.
The function returns:
if (OrderFall == 0) then MinContext is already at MAX order,
{ return pointer to new or existing context of same MAX order }
else
{ return pointer to new real context that will be (Order+1) in comparison with MinContext
also it can return pointer to real context of same order,
*/
MY_NO_INLINE
static CTX_PTR CreateSuccessors(CPpmd7 *p)
{
CTX_PTR c = p->MinContext;
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
Byte newSym, newFreq;
unsigned numPs = 0;
CPpmd_State *ps[PPMD7_MAX_ORDER];
if (p->OrderFall != 0)
ps[numPs++] = p->FoundState;
while (c->Suffix)
{
CPpmd_Void_Ref successor;
CPpmd_State *s;
c = SUFFIX(c);
if (c->NumStats != 1)
{
Byte sym = p->FoundState->Symbol;
for (s = STATS(c); s->Symbol != sym; s++);
}
else
{
s = ONE_STATE(c);
}
successor = SUCCESSOR(s);
if (successor != upBranch)
{
// (c) is real record Context here,
c = CTX(successor);
if (numPs == 0)
{
// (c) is real record MAX Order Context here,
// So we don't need to create any new contexts.
return c;
}
break;
}
ps[numPs++] = s;
}
// All created contexts will have single-symbol with new RAW-Successor
// All new RAW-Successors will point to next position in RAW text
// after FoundState->Successor
newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
upBranch++;
if (c->NumStats == 1)
newFreq = ONE_STATE(c)->Freq;
else
{
UInt32 cf, s0;
CPpmd_State *s;
for (s = STATS(c); s->Symbol != newSym; s++);
cf = (UInt32)s->Freq - 1;
s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
/*
cf - is frequency of symbol that will be Successor in new context records.
s0 - is commulative frequency sum of another symbols from parent context.
max(newFreq)= (s->Freq + 1), when (s0 == 1)
we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[]
so (s->Freq < 128) - is requirement for multi-symbol contexts
*/
newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1));
}
// Create new single-symbol contexts from low order to high order in loop
do
{
CTX_PTR c1;
/* = AllocContext(p); */
if (p->HiUnit != p->LoUnit)
c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0);
else
{
c1 = (CTX_PTR)AllocUnitsRare(p, 0);
if (!c1)
return NULL;
}
c1->NumStats = 1;
ONE_STATE(c1)->Symbol = newSym;
ONE_STATE(c1)->Freq = newFreq;
SetSuccessor(ONE_STATE(c1), upBranch);
c1->Suffix = REF(c);
SetSuccessor(ps[--numPs], REF(c1));
c = c1;
}
while (numPs != 0);
return c;
}
#define SwapStates(s) \
{ CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
void Ppmd7_UpdateModel(CPpmd7 *p);
MY_NO_INLINE
void Ppmd7_UpdateModel(CPpmd7 *p)
{
CPpmd_Void_Ref maxSuccessor, minSuccessor;
CTX_PTR c, mc;
unsigned s0, ns;
if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
{
/* Update Freqs in Suffix Context */
c = SUFFIX(p->MinContext);
if (c->NumStats == 1)
{
CPpmd_State *s = ONE_STATE(c);
if (s->Freq < 32)
s->Freq++;
}
else
{
CPpmd_State *s = STATS(c);
Byte sym = p->FoundState->Symbol;
if (s->Symbol != sym)
{
do
{
// s++; if (s->Symbol == sym) break;
s++;
}
while (s->Symbol != sym);
if (s[0].Freq >= s[-1].Freq)
{
SwapStates(s);
s--;
}
}
if (s->Freq < MAX_FREQ - 9)
{
s->Freq = (Byte)(s->Freq + 2);
c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
}
if (p->OrderFall == 0)
{
/* MAX ORDER context */
/* (FoundState->Successor) is RAW-Successor. */
p->MaxContext = p->MinContext = CreateSuccessors(p);
if (!p->MinContext)
{
RestartModel(p);
return;
}
SetSuccessor(p->FoundState, REF(p->MinContext));
return;
}
/* NON-MAX ORDER context */
{
Byte *text = p->Text;
*text++ = p->FoundState->Symbol;
p->Text = text;
if (text >= p->UnitsStart)
{
RestartModel(p);
return;
}
maxSuccessor = REF(text);
}
minSuccessor = SUCCESSOR(p->FoundState);
if (minSuccessor)
{
// there is Successor for FoundState in MinContext.
// So the next context will be one order higher than MinContext.
if (minSuccessor <= maxSuccessor)
{
// minSuccessor is RAW-Successor. So we will create real contexts records:
CTX_PTR cs = CreateSuccessors(p);
if (!cs)
{
RestartModel(p);
return;
}
minSuccessor = REF(cs);
}
// minSuccessor now is real Context pointer that points to existing (Order+1) context
if (--p->OrderFall == 0)
{
/*
if we move to MaxOrder context, then minSuccessor will be common Succesor for both:
MinContext that is (MaxOrder - 1)
MaxContext that is (MaxOrder)
so we don't need new RAW-Successor, and we can use real minSuccessor
as succssors for both MinContext and MaxContext.
*/
maxSuccessor = minSuccessor;
/*
if (MaxContext != MinContext)
{
there was order fall from MaxOrder and we don't need current symbol
to transfer some RAW-Succesors to real contexts.
So we roll back pointer in raw data for one position.
}
*/
p->Text -= (p->MaxContext != p->MinContext);
}
}
else
{
/*
FoundState has NULL-Successor here.
And only root 0-order context can contain NULL-Successors.
We change Successor in FoundState to RAW-Successor,
And next context will be same 0-order root Context.
*/
SetSuccessor(p->FoundState, maxSuccessor);
minSuccessor = REF(p->MinContext);
}
mc = p->MinContext;
c = p->MaxContext;
p->MaxContext = p->MinContext = CTX(minSuccessor);
if (c == mc)
return;
// s0 : is pure Escape Freq
s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1);
do
{
unsigned ns1;
UInt32 sum;
if ((ns1 = c->NumStats) != 1)
{
if ((ns1 & 1) == 0)
{
/* Expand for one UNIT */
unsigned oldNU = ns1 >> 1;
unsigned i = U2I(oldNU);
if (i != U2I((size_t)oldNU + 1))
{
void *ptr = AllocUnits(p, i + 1);
void *oldPtr;
if (!ptr)
{
RestartModel(p);
return;
}
oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU);
InsertNode(p, oldPtr, i);
c->Union4.Stats = STATS_REF(ptr);
}
}
sum = c->Union2.SummFreq;
/* max increase of Escape_Freq is 3 here.
total increase of Union2.SummFreq for all symbols is less than 256 here */
sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
/* original PPMdH uses 16-bit variable for (sum) here.
But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
// sum = (UInt16)sum;
}
else
{
// instead of One-symbol context we create 2-symbol context
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
if (!s)
{
RestartModel(p);
return;
}
{
unsigned freq = c->Union2.State2.Freq;
// s = *ONE_STATE(c);
s->Symbol = c->Union2.State2.Symbol;
s->Successor_0 = c->Union4.State4.Successor_0;
s->Successor_1 = c->Union4.State4.Successor_1;
// SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
// (Successor_0 and Successor_1) in LE/BE.
c->Union4.Stats = REF(s);
if (freq < MAX_FREQ / 4 - 1)
freq <<= 1;
else
freq = MAX_FREQ - 4;
// (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
s->Freq = (Byte)freq;
// max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
sum = freq + p->InitEsc + (ns > 3);
}
}
{
CPpmd_State *s = STATS(c) + ns1;
UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq;
UInt32 sf = (UInt32)s0 + sum;
s->Symbol = p->FoundState->Symbol;
c->NumStats = (UInt16)(ns1 + 1);
SetSuccessor(s, maxSuccessor);
if (cf < 6 * sf)
{
cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf);
sum += 3;
/* It can add (0, 1, 2) to Escape_Freq */
}
else
{
cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
sum += cf;
}
c->Union2.SummFreq = (UInt16)sum;
s->Freq = (Byte)cf;
}
c = SUFFIX(c);
}
while (c != mc);
}
MY_NO_INLINE
static void Rescale(CPpmd7 *p)
{
unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext);
CPpmd_State *s = p->FoundState;
/* Sort the list by Freq */
if (s != stats)
{
CPpmd_State tmp = *s;
do
s[0] = s[-1];
while (--s != stats);
*s = tmp;
}
sumFreq = s->Freq;
escFreq = p->MinContext->Union2.SummFreq - sumFreq;
/*
if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context
if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context
*/
adder = (p->OrderFall != 0);
#ifdef PPMD7_ORDER_0_SUPPPORT
adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context
#endif
sumFreq = (sumFreq + 4 + adder) >> 1;
i = (unsigned)p->MinContext->NumStats - 1;
s->Freq = (Byte)sumFreq;
do
{
unsigned freq = (++s)->Freq;
escFreq -= freq;
freq = (freq + adder) >> 1;
sumFreq += freq;
s->Freq = (Byte)freq;
if (freq > s[-1].Freq)
{
CPpmd_State tmp = *s;
CPpmd_State *s1 = s;
do
{
s1[0] = s1[-1];
}
while (--s1 != stats && freq > s1[-1].Freq);
*s1 = tmp;
}
}
while (--i);
if (s->Freq == 0)
{
/* Remove all items with Freq == 0 */
CPpmd7_Context *mc;
unsigned numStats, numStatsNew, n0, n1;
i = 0; do { i++; } while ((--s)->Freq == 0);
/* We increase (escFreq) for the number of removed symbols.
So we will have (0.5) increase for Escape_Freq in avarage per
removed symbol after Escape_Freq halving */
escFreq += i;
mc = p->MinContext;
numStats = mc->NumStats;
numStatsNew = numStats - i;
mc->NumStats = (UInt16)(numStatsNew);
n0 = (numStats + 1) >> 1;
if (numStatsNew == 1)
{
/* Create Single-Symbol context */
unsigned freq = stats->Freq;
do
{
escFreq >>= 1;
freq = (freq + 1) >> 1;
}
while (escFreq > 1);
s = ONE_STATE(mc);
*s = *stats;
s->Freq = (Byte)freq; // (freq <= 260 / 4)
p->FoundState = s;
InsertNode(p, stats, U2I(n0));
return;
}
n1 = (numStatsNew + 1) >> 1;
if (n0 != n1)
{
// p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
unsigned i0 = U2I(n0);
unsigned i1 = U2I(n1);
if (i0 != i1)
{
if (p->FreeList[i1] != 0)
{
void *ptr = RemoveNode(p, i1);
p->MinContext->Union4.Stats = STATS_REF(ptr);
MyMem12Cpy(ptr, (const void *)stats, n1);
InsertNode(p, stats, i0);
}
else
SplitBlock(p, stats, i0, i1);
}
}
}
{
CPpmd7_Context *mc = p->MinContext;
mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
// Escape_Freq halving here
p->FoundState = STATS(mc);
}
}
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
{
CPpmd_See *see;
const CPpmd7_Context *mc = p->MinContext;
unsigned numStats = mc->NumStats;
if (numStats != 256)
{
unsigned nonMasked = numStats - numMasked;
see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats)
+ 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats)
+ 4 * (unsigned)(numMasked > nonMasked) +
p->HiBitsFlag;
{
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
unsigned summ = (UInt16)see->Summ; // & 0xFFFF
unsigned r = (summ >> see->Shift);
see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0);
}
}
else
{
see = &p->DummySee;
*escFreq = 1;
}
return see;
}
static void NextContext(CPpmd7 *p)
{
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
void Ppmd7_Update1(CPpmd7 *p)
{
CPpmd_State *s = p->FoundState;
unsigned freq = s->Freq;
freq += 4;
p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
s->Freq = (Byte)freq;
if (freq > s[-1].Freq)
{
SwapStates(s);
p->FoundState = --s;
if (freq > MAX_FREQ)
Rescale(p);
}
NextContext(p);
}
void Ppmd7_Update1_0(CPpmd7 *p)
{
CPpmd_State *s = p->FoundState;
CPpmd7_Context *mc = p->MinContext;
unsigned freq = s->Freq;
unsigned summFreq = mc->Union2.SummFreq;
p->PrevSuccess = (2 * freq > summFreq);
p->RunLength += (int)p->PrevSuccess;
mc->Union2.SummFreq = (UInt16)(summFreq + 4);
freq += 4;
s->Freq = (Byte)freq;
if (freq > MAX_FREQ)
Rescale(p);
NextContext(p);
}
/*
void Ppmd7_UpdateBin(CPpmd7 *p)
{
unsigned freq = p->FoundState->Freq;
p->FoundState->Freq = (Byte)(freq + (freq < 128));
p->PrevSuccess = 1;
p->RunLength++;
NextContext(p);
}
*/
void Ppmd7_Update2(CPpmd7 *p)
{
CPpmd_State *s = p->FoundState;
unsigned freq = s->Freq;
freq += 4;
p->RunLength = p->InitRL;
p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
s->Freq = (Byte)freq;
if (freq > MAX_FREQ)
Rescale(p);
Ppmd7_UpdateModel(p);
}
/*
PPMd Memory Map:
{
[ 0 ] contains subset of original raw text, that is required to create context
records, Some symbols are not written, when max order context was reached
[ Text ] free area
[ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records
[ LoUnit ] free area for CPpmd_State and CPpmd7_Context items
[ HiUnit ] CPpmd7_Context records
[ Size ] end of array
}
These addresses don't cross at any time.
And the following condtions is true for addresses:
(0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size)
Raw text is BYTE--aligned.
the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs.
Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
The code doesn't free UNITs allocated for CPpmd7_Context records.
The code calls RestartModel(), when there is no free memory for allocation.
And RestartModel() changes the state to orignal start state, with full free block.
The code allocates UNITs with the following order:
Allocation of 1 UNIT for Context record
- from free space (HiUnit) down to (LoUnit)
- from FreeList[0]
- AllocUnitsRare()
AllocUnits() for CPpmd_State vectors:
- from FreeList[i]
- from free space (LoUnit) up to (HiUnit)
- AllocUnitsRare()
AllocUnitsRare()
- if (GlueCount == 0)
{ Glue lists, GlueCount = 255, allocate from FreeList[i]] }
- loop for all higher sized FreeList[...] lists
- from (UnitsStart - Text), GlueCount--
- ERROR
Each Record with Context contains the CPpmd_State vector, where each
CPpmd_State contains the link to Successor.
There are 3 types of Successor:
1) NULL-Successor - NULL pointer. NULL-Successor links can be stored
only in 0-order Root Context Record.
We use 0 value as NULL-Successor
2) RAW-Successor - the link to position in raw text,
that "RAW-Successor" is being created after first
occurrence of new symbol for some existing context record.
(RAW-Successor > 0).
3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1),
that record is being created when we go via RAW-Successor again.
For any successors at any time: the following condtions are true for Successor links:
(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor)
---------- Symbol Frequency, SummFreq and Range in Range_Coder ----------
CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq
The PPMd code tries to fulfill the condition:
(SummFreq <= (256 * 128 = RC::kBot))
We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions.
Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for
max-order context.
When the PPMd code still break (Total <= RC::Range) condition in range coder,
we have two ways to resolve that problem:
1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
*/