# # Copyright (c) 2024 Bitdefender # SPDX-License-Identifier: Apache-2.0 # import sys import re import json import string class UniqDict(dict): def __setitem__(self, key: str, value: str) -> type(None): if key in self.keys(): raise ValueError("key is already present : %s" % (key)) else: return super(UniqDict, self).__setitem__(key, value) class DecodeShemuParser(object): def __init__(self, obj: str): self._obj = obj self._crt = 0 self._data = UniqDict() def rdline(self) -> str: """ Reads the line corresponding to the current index from the string containing the result. This function also increment the index to the current line. """ if self._crt == len(self._obj): return None out = self._obj[self._crt] self._crt += 1 return out def rdnline(self) -> str: """ Reads the next line corresponding to the current index from the string containing the result. This function does not increment the index to the current line. """ if self._crt == len(self._obj): return None out = self._obj[self._crt] return out class ShemuResult(DecodeShemuParser): def __init__(self, obj: str): DecodeShemuParser.__init__(self, obj) self.process() def process(self) -> type(None): """ Parses the emulation result generated by disasmtool and stores it in a dictionary as key:value pair as follows: { "Emulation terminated with status": "0x0000000a", "flags:": "0x80", "NOPs": "0", "NULLs": "0", "total instructions": "2", "unique instructions": "1", "SHEMU_FLAG-0": "SHEMU_FLAG_HEAVENS_GATE" } """ self._obj = self._obj.split("\n") self._obj = list(filter(None, self._obj)) # Emulation terminated with status 0x0000000a, flags: 0x10, 0 NOPs, 0 NULLs, 10 total instructions, 10 unique instructions line = self.rdline() tokens = line.split(",") tokens = list(filter(None, tokens)) tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens)) for token in tokens: tsplit = token.lstrip().rstrip().split(" ") if ( all(c in string.hexdigits + "x" for c in tsplit[-1]) or tsplit[-1].isnumeric() ): val = tsplit[-1] key = " ".join(tsplit[:-1]) else: val = tsplit[0] key = " ".join(tsplit[1:]) self._data[key] = val line = self.rdline() cnt = 0 while line: self._data["SHEMU_FLAG-%s" % (cnt)] = line.lstrip().rstrip() cnt += 1 line = self.rdline() class ShemuInstrux(DecodeShemuParser): def __init__(self, obj: str): DecodeShemuParser.__init__(self, obj) self.process() def process(self) -> type(None): """ Parses the emulation result generated by disasmtool and stores it in a dictionary as key:value pair as follows: { "AX": "0x0000000000000000", "CX": "0x0000000000000000", "DX": "0x0000000000000000", "BX": "0x0000000000000000", "BP": "0x0000000000000000", "SI": "0x0000000000000000", ... "28": "0x0000000000000000", "29": "0x0000000000000000", "30": "0x0000000000000000", "31": "0x0000000000000000", "IP": "0x0000000000200000", "GS": "0x0000000000000202" } """ self._obj = self._obj.split("\n") line = self.rdline() while line: # X0 = 0x0000000000000000 X1 = 0x0000000000000000 X2 = 0x0000000000000000 X3 = 0x0000000000000000 if " = " in line: tokens = re.findall(r"\w\w\s*=\s*0x\d{16}", line) for token in tokens: key = token.lstrip().rstrip().split("=")[0].lstrip().rstrip() val = token.lstrip().rstrip().split("=")[1].lstrip().rstrip() self._data[key] = val line = self.rdline() class DecodeInstrux(DecodeShemuParser): def __init__(self, obj: str): DecodeShemuParser.__init__(self, obj) self.process() def process(self) -> type(None): """ Parses an instruction generated by disasmtool and stores it in a dictionary as key:value pair as follows: { "InstructionBytes": "c4e2784900", "InstructionText": "LDTILECFG zmmword ptr [rax]", "RIP": "0000000000000000", "DSIZE": "32", "ASIZE": "64", "VLEN": "-", "ISA Set": "AMX-TILE", ... "Operand-0": { "Operand": "0", "Acc": "R-", "Type": "Memory", "Size": "64", "RawSize": "64", "Encoding": "M", "Segment": "3", "Base": "0" } } """ self._obj = self._obj.split("\n") line = self.rdline() while line: # 0000000000000000 c4e2784900 LDTILECFG zmmword ptr [rax] # 0000000000000000 62 db 0x62 (0x80000002) if re.search("^[0-9A-F]{16}", line): tokens = line.split(" ") tokens = list(filter(None, tokens)) tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens)) self._data["InstructionBytes"] = tokens[1].lstrip().rstrip() self._data["InstructionText"] = " ".join(tokens[2:]).lstrip().rstrip() self._data["RIP"] = tokens[0].lstrip().rstrip() # Operand: 0, Acc: RW, Type: Register, Size: 1, RawSize: 1, Encoding: M, RegType: General Purpose, # RegSize: 1, RegId: 22, RegCount: 1 if "Operand:" in line: while self.rdnline() and "Operand:" not in self.rdnline(): # if line.endswith(", ") or line.endswith(","): line += self.rdline() local = UniqDict() tokens = line.split(",") tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens)) dc = 0 for token in tokens: if key == "Decorator": key = "%s-%s" % (key, dc) dc += 1 key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip() val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip() local[key] = val key = tokens[0].lstrip().rstrip().split(":")[0].lstrip().rstrip() val = tokens[0].lstrip().rstrip().split(":")[1].lstrip().rstrip() self._data["%s-%s" % (key, val)] = local # EVEX Tuple Type: Tuple 1 scalar, 8 bit # EVEX Tuple Type: Full elif "EVEX" in line: tokens = line.split(":") key = tokens[0].lstrip().rstrip() val = tokens[1].lstrip().rstrip() self._data[key] = val elif ": " in line: tokens = line.split(",") tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens)) for token in tokens: key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip() val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip() self._data[key] = val line = self.rdline()