From 1d43b7b1ba04414acdc8cc3c5f50d3b29bc21788 Mon Sep 17 00:00:00 2001 From: Andrei Vlad LUTAS Date: Tue, 11 Aug 2020 09:26:48 +0300 Subject: [PATCH 1/2] Improved stack string detection heuristic: only consider registers which have been modified during emulation; registers which were provided as "input" can be ignored, as they most likely contain addresses or other data relevant to the emulated code. We are only interested in string dynamically built during our emulation. --- bdshemu/bdshemu.c | 94 +++++++++++++++++++++++++++++++++++++++---- inc/bdshemu/bdshemu.h | 6 ++- inc/version.h | 2 +- 3 files changed, 92 insertions(+), 10 deletions(-) diff --git a/bdshemu/bdshemu.c b/bdshemu/bdshemu.c index c27c17a..1d89d73 100644 --- a/bdshemu/bdshemu.c +++ b/bdshemu/bdshemu.c @@ -655,13 +655,15 @@ ShemuSetGprValue( bool High8 ) { + uint32_t bit; + switch (Size) { case 1: if (High8) { // AH, CH, DH or BH accessed. - *((uint8_t *)(&Context->Registers.RegRax + Reg - 4) + 1) = Value & 0xFF; + *((uint8_t *)(&Context->Registers.RegRax + Reg - 4) + 1) = Value & 0xff; } else { @@ -682,6 +684,55 @@ ShemuSetGprValue( *(&Context->Registers.RegRax + Reg) = Value; break; } + + if (High8) + { + bit = Reg - 4; + } + else + { + bit = Reg; + } + + // Mark the GPR as being dirty/written. + Context->DirtyGprBitmap |= (1 << bit); +} + + +// +// ShemuCmpGprValue +// +static bool +ShemuCmpGprValue( + SHEMU_CONTEXT *Context, + uint32_t Reg, + uint32_t Size, + uint64_t Value, + bool High8 + ) +{ + switch (Size) + { + case 1: + if (High8) + { + // AH, CH, DH or BH. + return *((uint8_t *)(&Context->Registers.RegRax + Reg - 4) + 1) == (Value & 0xff); + } + else + { + return *((uint8_t *)(&Context->Registers.RegRax + Reg)) == (Value & 0xff); + } + + case 2: + return *((uint16_t *)(&Context->Registers.RegRax + Reg)) == (Value & 0xffff); + + case 4: + return *((uint32_t *)(&Context->Registers.RegRax + Reg)) == (Value & 0xffffffff); + + default: + return *(&Context->Registers.RegRax + Reg) == Value; + } } @@ -1299,6 +1350,8 @@ ShemuSetOperandValue( // Handle RIP save on the stack. if (ShemuIsStackPtr(Context, gla, MAX(op->Size, Context->Instruction.WordLength))) { + uint8_t stckstrlen = 0; + // Note: only Context->Instruction.WordLength bits are flagged as RIP, as that is the RIP size. if (Context->Instruction.Instruction == ND_INS_CALLNR || Context->Instruction.Instruction == ND_INS_CALLNI) @@ -1327,6 +1380,9 @@ ShemuSetOperandValue( // ... // PUSH strn // Other variants may exist, but all we care about are stores on the stack, and all are checked. + // Note that we will ignore registers which have not been modified during emulation; those are considered + // input values for the emulated code, and may be pointers or other data. We are interested only in + // stack values built within the emulate code. for (uint32_t i = 0; i < Value->Size; i++) { unsigned char c = Value->Value.Bytes[i]; @@ -1334,18 +1390,40 @@ ShemuSetOperandValue( if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '\\' || c == '/' || c == ':' || c == ' ') { - Context->StrLength++; + stckstrlen++; + } + else + { + break; + } + } - if (Context->StrLength >= Context->StrThreshold) + if (stckstrlen == Value->Size) + { + // Make sure the value is not present inside a non-dirty GPR. + for (uint32_t i = 0; i < 16; i++) + { + if (ShemuCmpGprValue(Context, i, Value->Size, Value->Value.Qwords[0], false) && + (0 == (Context->DirtyGprBitmap & (1 << i)))) { - Context->Flags |= SHEMU_FLAG_STACK_STR; + // A register is saved on the stack, but that register wasn't written during the emulation. + stckstrlen = 0; break; } } - else - { - Context->StrLength = 0; - } + } + + Context->StrLength += stckstrlen; + + if (Context->StrLength >= Context->StrThreshold) + { + Context->Flags |= SHEMU_FLAG_STACK_STR; + } + + if (stckstrlen != Value->Size) + { + // Not a full string stored on the stack, reset the counter. + Context->StrLength = 0; } } diff --git a/inc/bdshemu/bdshemu.h b/inc/bdshemu/bdshemu.h index b6788be..31080af 100644 --- a/inc/bdshemu/bdshemu.h +++ b/inc/bdshemu/bdshemu.h @@ -130,7 +130,11 @@ typedef struct _SHEMU_CONTEXT uint64_t MmxRegisters[ND_MAX_MMX_REGS]; // SSE registers state. 32 x 64 bytes = 2048 bytes for the SSE registers. Can be provided on input, if needed. - uint8_t SseRegisters[ND_MAX_SSE_REGS * ND_MAX_REGISTER_SIZE]; + uint8_t SseRegisters[ND_MAX_SSE_REGS * ND_MAX_REGISTER_SIZE]; + + // General purpose registers write bitmap. After the first write, a register will be marked dirty in here. + // Should be 0 on input. + uint16_t DirtyGprBitmap; // Operating mode (ND_CODE_16, ND_CODE_32 or ND_CODE_64). Must be provided as input. uint8_t Mode; diff --git a/inc/version.h b/inc/version.h index f158eb1..11affb4 100644 --- a/inc/version.h +++ b/inc/version.h @@ -7,6 +7,6 @@ #define DISASM_VERSION_MAJOR 1 #define DISASM_VERSION_MINOR 28 -#define DISASM_VERSION_REVISION 0 +#define DISASM_VERSION_REVISION 1 #endif // DISASM_VER_H From 79ee40b113bdb7bb2912b7e5d3bc19cf9faaf51b Mon Sep 17 00:00:00 2001 From: Andrei Vlad LUTAS Date: Tue, 11 Aug 2020 09:37:10 +0300 Subject: [PATCH 2/2] Fixed pybddisasm build. --- pybddisasm/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybddisasm/setup.py b/pybddisasm/setup.py index 69d9d5c..bfcde94 100644 --- a/pybddisasm/setup.py +++ b/pybddisasm/setup.py @@ -12,7 +12,7 @@ from setuptools import find_packages, setup, Command, Extension, Distribution from codecs import open VERSION = (0, 1, 2) -LIBRARY_VERSION = (1, 28, 0) +LIBRARY_VERSION = (1, 28, 1) LIBRARY_INSTRUX_SIZE = 856 packages = ['pybddisasm']