diff --git a/bdshemu/bdshemu.c b/bdshemu/bdshemu.c index c27c17a..1d89d73 100644 --- a/bdshemu/bdshemu.c +++ b/bdshemu/bdshemu.c @@ -655,13 +655,15 @@ ShemuSetGprValue( bool High8 ) { + uint32_t bit; + switch (Size) { case 1: if (High8) { // AH, CH, DH or BH accessed. - *((uint8_t *)(&Context->Registers.RegRax + Reg - 4) + 1) = Value & 0xFF; + *((uint8_t *)(&Context->Registers.RegRax + Reg - 4) + 1) = Value & 0xff; } else { @@ -682,6 +684,55 @@ ShemuSetGprValue( *(&Context->Registers.RegRax + Reg) = Value; break; } + + if (High8) + { + bit = Reg - 4; + } + else + { + bit = Reg; + } + + // Mark the GPR as being dirty/written. + Context->DirtyGprBitmap |= (1 << bit); +} + + +// +// ShemuCmpGprValue +// +static bool +ShemuCmpGprValue( + SHEMU_CONTEXT *Context, + uint32_t Reg, + uint32_t Size, + uint64_t Value, + bool High8 + ) +{ + switch (Size) + { + case 1: + if (High8) + { + // AH, CH, DH or BH. + return *((uint8_t *)(&Context->Registers.RegRax + Reg - 4) + 1) == (Value & 0xff); + } + else + { + return *((uint8_t *)(&Context->Registers.RegRax + Reg)) == (Value & 0xff); + } + + case 2: + return *((uint16_t *)(&Context->Registers.RegRax + Reg)) == (Value & 0xffff); + + case 4: + return *((uint32_t *)(&Context->Registers.RegRax + Reg)) == (Value & 0xffffffff); + + default: + return *(&Context->Registers.RegRax + Reg) == Value; + } } @@ -1299,6 +1350,8 @@ ShemuSetOperandValue( // Handle RIP save on the stack. if (ShemuIsStackPtr(Context, gla, MAX(op->Size, Context->Instruction.WordLength))) { + uint8_t stckstrlen = 0; + // Note: only Context->Instruction.WordLength bits are flagged as RIP, as that is the RIP size. if (Context->Instruction.Instruction == ND_INS_CALLNR || Context->Instruction.Instruction == ND_INS_CALLNI) @@ -1327,6 +1380,9 @@ ShemuSetOperandValue( // ... // PUSH strn // Other variants may exist, but all we care about are stores on the stack, and all are checked. + // Note that we will ignore registers which have not been modified during emulation; those are considered + // input values for the emulated code, and may be pointers or other data. We are interested only in + // stack values built within the emulate code. for (uint32_t i = 0; i < Value->Size; i++) { unsigned char c = Value->Value.Bytes[i]; @@ -1334,18 +1390,40 @@ ShemuSetOperandValue( if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '\\' || c == '/' || c == ':' || c == ' ') { - Context->StrLength++; + stckstrlen++; + } + else + { + break; + } + } - if (Context->StrLength >= Context->StrThreshold) + if (stckstrlen == Value->Size) + { + // Make sure the value is not present inside a non-dirty GPR. + for (uint32_t i = 0; i < 16; i++) + { + if (ShemuCmpGprValue(Context, i, Value->Size, Value->Value.Qwords[0], false) && + (0 == (Context->DirtyGprBitmap & (1 << i)))) { - Context->Flags |= SHEMU_FLAG_STACK_STR; + // A register is saved on the stack, but that register wasn't written during the emulation. + stckstrlen = 0; break; } } - else - { - Context->StrLength = 0; - } + } + + Context->StrLength += stckstrlen; + + if (Context->StrLength >= Context->StrThreshold) + { + Context->Flags |= SHEMU_FLAG_STACK_STR; + } + + if (stckstrlen != Value->Size) + { + // Not a full string stored on the stack, reset the counter. + Context->StrLength = 0; } } diff --git a/inc/bdshemu/bdshemu.h b/inc/bdshemu/bdshemu.h index b6788be..31080af 100644 --- a/inc/bdshemu/bdshemu.h +++ b/inc/bdshemu/bdshemu.h @@ -130,7 +130,11 @@ typedef struct _SHEMU_CONTEXT uint64_t MmxRegisters[ND_MAX_MMX_REGS]; // SSE registers state. 32 x 64 bytes = 2048 bytes for the SSE registers. Can be provided on input, if needed. - uint8_t SseRegisters[ND_MAX_SSE_REGS * ND_MAX_REGISTER_SIZE]; + uint8_t SseRegisters[ND_MAX_SSE_REGS * ND_MAX_REGISTER_SIZE]; + + // General purpose registers write bitmap. After the first write, a register will be marked dirty in here. + // Should be 0 on input. + uint16_t DirtyGprBitmap; // Operating mode (ND_CODE_16, ND_CODE_32 or ND_CODE_64). Must be provided as input. uint8_t Mode; diff --git a/inc/version.h b/inc/version.h index f158eb1..11affb4 100644 --- a/inc/version.h +++ b/inc/version.h @@ -7,6 +7,6 @@ #define DISASM_VERSION_MAJOR 1 #define DISASM_VERSION_MINOR 28 -#define DISASM_VERSION_REVISION 0 +#define DISASM_VERSION_REVISION 1 #endif // DISASM_VER_H diff --git a/pybddisasm/setup.py b/pybddisasm/setup.py index 69d9d5c..bfcde94 100644 --- a/pybddisasm/setup.py +++ b/pybddisasm/setup.py @@ -12,7 +12,7 @@ from setuptools import find_packages, setup, Command, Extension, Distribution from codecs import open VERSION = (0, 1, 2) -LIBRARY_VERSION = (1, 28, 0) +LIBRARY_VERSION = (1, 28, 1) LIBRARY_INSTRUX_SIZE = 856 packages = ['pybddisasm']