/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) 2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" /* #define SHOW_STAT */ #ifdef SHOW_STAT #include #define PRF(x) x #else #define PRF(x) #endif #include #include "Bcj2.h" #include "CpuArch.h" #define CProb UInt16 #define kTopValue ((UInt32)1 << 24) #define kNumModelBits 11 #define kBitModelTotal (1 << kNumModelBits) #define kNumMoveBits 5 void Bcj2Enc_Init(CBcj2Enc *p) { unsigned i; p->state = BCJ2_ENC_STATE_OK; p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; p->prevByte = 0; p->cache = 0; p->range = 0xFFFFFFFF; p->low = 0; p->cacheSize = 1; p->ip = 0; p->fileIp = 0; p->fileSize = 0; p->relatLimit = BCJ2_RELAT_LIMIT; p->tempPos = 0; p->flushPos = 0; for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) p->probs[i] = kBitModelTotal >> 1; } static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) { if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0) { Byte *buf = p->bufs[BCJ2_STREAM_RC]; do { if (buf == p->lims[BCJ2_STREAM_RC]) { p->state = BCJ2_STREAM_RC; p->bufs[BCJ2_STREAM_RC] = buf; return True; } *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32)); p->cache = 0xFF; } while (--p->cacheSize); p->bufs[BCJ2_STREAM_RC] = buf; p->cache = (Byte)((UInt32)p->low >> 24); } p->cacheSize++; p->low = (UInt32)p->low << 8; return False; } static void Bcj2Enc_Encode_2(CBcj2Enc *p) { if (BCJ2_IS_32BIT_STREAM(p->state)) { Byte *cur = p->bufs[p->state]; if (cur == p->lims[p->state]) return; SetBe32(cur, p->tempTarget); p->bufs[p->state] = cur + 4; } p->state = BCJ2_ENC_STATE_ORIG; for (;;) { if (p->range < kTopValue) { if (RangeEnc_ShiftLow(p)) return; p->range <<= 8; } { { const Byte *src = p->src; const Byte *srcLim; Byte *dest; SizeT num = (SizeT)(p->srcLim - src); if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) { if (num <= 4) return; num -= 4; } else if (num == 0) break; dest = p->bufs[BCJ2_STREAM_MAIN]; if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) { num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); if (num == 0) { p->state = BCJ2_STREAM_MAIN; return; } } srcLim = src + num; if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80) *dest = src[0]; else for (;;) { Byte b = *src; *dest = b; if (b != 0x0F) { if ((b & 0xFE) == 0xE8) break; dest++; if (++src != srcLim) continue; break; } dest++; if (++src == srcLim) break; if ((*src & 0xF0) != 0x80) continue; *dest = *src; break; } num = (SizeT)(src - p->src); if (src == srcLim) { p->prevByte = src[-1]; p->bufs[BCJ2_STREAM_MAIN] = dest; p->src = src; p->ip += (UInt32)num; continue; } { Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]); BoolInt needConvert; p->bufs[BCJ2_STREAM_MAIN] = dest + 1; p->ip += (UInt32)num + 1; src++; needConvert = False; if ((SizeT)(p->srcLim - src) >= 4) { UInt32 relatVal = GetUi32(src); if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize) && ((relatVal + p->relatLimit) >> 1) < p->relatLimit) needConvert = True; } { UInt32 bound; unsigned ttt; Byte b = src[-1]; CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0)); ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (!needConvert) { p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); p->src = src; p->prevByte = b; continue; } p->low += bound; p->range -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits)); { UInt32 relatVal = GetUi32(src); UInt32 absVal; p->ip += 4; absVal = p->ip + relatVal; p->prevByte = src[3]; src += 4; p->src = src; { unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; Byte *cur = p->bufs[cj]; if (cur == p->lims[cj]) { p->state = cj; p->tempTarget = absVal; return; } SetBe32(cur, absVal); p->bufs[cj] = cur + 4; } } } } } } } if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) return; for (; p->flushPos < 5; p->flushPos++) if (RangeEnc_ShiftLow(p)) return; p->state = BCJ2_ENC_STATE_OK; } void Bcj2Enc_Encode(CBcj2Enc *p) { PRF(printf("\n")); PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); if (p->tempPos != 0) { unsigned extra = 0; for (;;) { const Byte *src = p->src; const Byte *srcLim = p->srcLim; EBcj2Enc_FinishMode finishMode = p->finishMode; p->src = p->temp; p->srcLim = p->temp + p->tempPos; if (src != srcLim) p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); Bcj2Enc_Encode_2(p); { unsigned num = (unsigned)(p->src - p->temp); unsigned tempPos = p->tempPos - num; unsigned i; p->tempPos = tempPos; for (i = 0; i < tempPos; i++) p->temp[i] = p->temp[(size_t)i + num]; p->src = src; p->srcLim = srcLim; p->finishMode = finishMode; if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim) return; if (extra >= tempPos) { p->src = src - tempPos; p->tempPos = 0; break; } p->temp[tempPos] = src[0]; p->tempPos = tempPos + 1; p->src = src + 1; extra++; } } } PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); Bcj2Enc_Encode_2(p); if (p->state == BCJ2_ENC_STATE_ORIG) { const Byte *src = p->src; unsigned rem = (unsigned)(p->srcLim - src); unsigned i; for (i = 0; i < rem; i++) p->temp[i] = src[i]; p->tempPos = rem; p->src = src + rem; } }