mirror of
https://github.com/bitdefender/bddisasm.git
synced 2025-01-11 15:40:55 +00:00
220 lines
7.7 KiB
Python
220 lines
7.7 KiB
Python
|
#
|
||
|
# Copyright (c) 2024 Bitdefender
|
||
|
# SPDX-License-Identifier: Apache-2.0
|
||
|
#
|
||
|
import sys
|
||
|
import re
|
||
|
import json
|
||
|
import string
|
||
|
|
||
|
|
||
|
class UniqDict(dict):
|
||
|
def __setitem__(self, key: str, value: str) -> type(None):
|
||
|
if key in self.keys():
|
||
|
raise ValueError("key is already present : %s" % (key))
|
||
|
else:
|
||
|
return super(UniqDict, self).__setitem__(key, value)
|
||
|
|
||
|
|
||
|
class DecodeShemuParser(object):
|
||
|
def __init__(self, obj: str):
|
||
|
self._obj = obj
|
||
|
self._crt = 0
|
||
|
self._data = UniqDict()
|
||
|
|
||
|
def rdline(self) -> str:
|
||
|
"""
|
||
|
Reads the line corresponding to the current index from the string containing the result.
|
||
|
This function also increment the index to the current line.
|
||
|
"""
|
||
|
if self._crt == len(self._obj):
|
||
|
return None
|
||
|
|
||
|
out = self._obj[self._crt]
|
||
|
self._crt += 1
|
||
|
|
||
|
return out
|
||
|
|
||
|
def rdnline(self) -> str:
|
||
|
"""
|
||
|
Reads the next line corresponding to the current index from the string containing the result.
|
||
|
This function does not increment the index to the current line.
|
||
|
"""
|
||
|
if self._crt == len(self._obj):
|
||
|
return None
|
||
|
|
||
|
out = self._obj[self._crt]
|
||
|
return out
|
||
|
|
||
|
|
||
|
class ShemuResult(DecodeShemuParser):
|
||
|
def __init__(self, obj: str):
|
||
|
DecodeShemuParser.__init__(self, obj)
|
||
|
|
||
|
self.process()
|
||
|
|
||
|
def process(self) -> type(None):
|
||
|
"""
|
||
|
Parses the emulation result generated by disasmtool and stores it in a dictionary as key:value pair as follows:
|
||
|
{
|
||
|
"Emulation terminated with status": "0x0000000a",
|
||
|
"flags:": "0x80",
|
||
|
"NOPs": "0",
|
||
|
"NULLs": "0",
|
||
|
"total instructions": "2",
|
||
|
"unique instructions": "1",
|
||
|
"SHEMU_FLAG-0": "SHEMU_FLAG_HEAVENS_GATE"
|
||
|
}
|
||
|
"""
|
||
|
self._obj = self._obj.split("\n")
|
||
|
self._obj = list(filter(None, self._obj))
|
||
|
# Emulation terminated with status 0x0000000a, flags: 0x10, 0 NOPs, 0 NULLs, 10 total instructions, 10 unique instructions
|
||
|
line = self.rdline()
|
||
|
|
||
|
tokens = line.split(",")
|
||
|
tokens = list(filter(None, tokens))
|
||
|
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
|
||
|
for token in tokens:
|
||
|
tsplit = token.lstrip().rstrip().split(" ")
|
||
|
if (
|
||
|
all(c in string.hexdigits + "x" for c in tsplit[-1])
|
||
|
or tsplit[-1].isnumeric()
|
||
|
):
|
||
|
val = tsplit[-1]
|
||
|
key = " ".join(tsplit[:-1])
|
||
|
else:
|
||
|
val = tsplit[0]
|
||
|
key = " ".join(tsplit[1:])
|
||
|
self._data[key] = val
|
||
|
|
||
|
line = self.rdline()
|
||
|
cnt = 0
|
||
|
while line:
|
||
|
self._data["SHEMU_FLAG-%s" % (cnt)] = line.lstrip().rstrip()
|
||
|
cnt += 1
|
||
|
line = self.rdline()
|
||
|
|
||
|
|
||
|
class ShemuInstrux(DecodeShemuParser):
|
||
|
def __init__(self, obj: str):
|
||
|
DecodeShemuParser.__init__(self, obj)
|
||
|
|
||
|
self.process()
|
||
|
|
||
|
def process(self) -> type(None):
|
||
|
"""
|
||
|
Parses the emulation result generated by disasmtool and stores it in a dictionary as key:value pair as follows:
|
||
|
{
|
||
|
"AX": "0x0000000000000000",
|
||
|
"CX": "0x0000000000000000",
|
||
|
"DX": "0x0000000000000000",
|
||
|
"BX": "0x0000000000000000",
|
||
|
"BP": "0x0000000000000000",
|
||
|
"SI": "0x0000000000000000",
|
||
|
...
|
||
|
"28": "0x0000000000000000",
|
||
|
"29": "0x0000000000000000",
|
||
|
"30": "0x0000000000000000",
|
||
|
"31": "0x0000000000000000",
|
||
|
"IP": "0x0000000000200000",
|
||
|
"GS": "0x0000000000000202"
|
||
|
}
|
||
|
"""
|
||
|
self._obj = self._obj.split("\n")
|
||
|
line = self.rdline()
|
||
|
while line:
|
||
|
# X0 = 0x0000000000000000 X1 = 0x0000000000000000 X2 = 0x0000000000000000 X3 = 0x0000000000000000
|
||
|
if " = " in line:
|
||
|
tokens = re.findall(r"\w\w\s*=\s*0x\d{16}", line)
|
||
|
for token in tokens:
|
||
|
key = token.lstrip().rstrip().split("=")[0].lstrip().rstrip()
|
||
|
val = token.lstrip().rstrip().split("=")[1].lstrip().rstrip()
|
||
|
self._data[key] = val
|
||
|
line = self.rdline()
|
||
|
|
||
|
|
||
|
class DecodeInstrux(DecodeShemuParser):
|
||
|
def __init__(self, obj: str):
|
||
|
DecodeShemuParser.__init__(self, obj)
|
||
|
|
||
|
self.process()
|
||
|
|
||
|
def process(self) -> type(None):
|
||
|
"""
|
||
|
Parses an instruction generated by disasmtool and stores it in a dictionary as key:value pair as follows:
|
||
|
{
|
||
|
"InstructionBytes": "c4e2784900",
|
||
|
"InstructionText": "LDTILECFG zmmword ptr [rax]",
|
||
|
"RIP": "0000000000000000",
|
||
|
"DSIZE": "32",
|
||
|
"ASIZE": "64",
|
||
|
"VLEN": "-",
|
||
|
"ISA Set": "AMX-TILE",
|
||
|
...
|
||
|
"Operand-0": {
|
||
|
"Operand": "0",
|
||
|
"Acc": "R-",
|
||
|
"Type": "Memory",
|
||
|
"Size": "64",
|
||
|
"RawSize": "64",
|
||
|
"Encoding": "M",
|
||
|
"Segment": "3",
|
||
|
"Base": "0"
|
||
|
}
|
||
|
}
|
||
|
"""
|
||
|
self._obj = self._obj.split("\n")
|
||
|
|
||
|
line = self.rdline()
|
||
|
while line:
|
||
|
# 0000000000000000 c4e2784900 LDTILECFG zmmword ptr [rax]
|
||
|
# 0000000000000000 62 db 0x62 (0x80000002)
|
||
|
if re.search("^[0-9A-F]{16}", line):
|
||
|
tokens = line.split(" ")
|
||
|
tokens = list(filter(None, tokens))
|
||
|
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
|
||
|
self._data["InstructionBytes"] = tokens[1].lstrip().rstrip()
|
||
|
self._data["InstructionText"] = " ".join(tokens[2:]).lstrip().rstrip()
|
||
|
self._data["RIP"] = tokens[0].lstrip().rstrip()
|
||
|
|
||
|
# Operand: 0, Acc: RW, Type: Register, Size: 1, RawSize: 1, Encoding: M, RegType: General Purpose,
|
||
|
# RegSize: 1, RegId: 22, RegCount: 1
|
||
|
if "Operand:" in line:
|
||
|
while self.rdnline() and "Operand:" not in self.rdnline():
|
||
|
# if line.endswith(", ") or line.endswith(","):
|
||
|
line += self.rdline()
|
||
|
|
||
|
local = UniqDict()
|
||
|
tokens = line.split(",")
|
||
|
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
|
||
|
dc = 0
|
||
|
for token in tokens:
|
||
|
if key == "Decorator":
|
||
|
key = "%s-%s" % (key, dc)
|
||
|
dc += 1
|
||
|
key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip()
|
||
|
val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip()
|
||
|
local[key] = val
|
||
|
|
||
|
key = tokens[0].lstrip().rstrip().split(":")[0].lstrip().rstrip()
|
||
|
val = tokens[0].lstrip().rstrip().split(":")[1].lstrip().rstrip()
|
||
|
self._data["%s-%s" % (key, val)] = local
|
||
|
|
||
|
# EVEX Tuple Type: Tuple 1 scalar, 8 bit
|
||
|
# EVEX Tuple Type: Full
|
||
|
elif "EVEX" in line:
|
||
|
tokens = line.split(":")
|
||
|
key = tokens[0].lstrip().rstrip()
|
||
|
val = tokens[1].lstrip().rstrip()
|
||
|
self._data[key] = val
|
||
|
|
||
|
elif ": " in line:
|
||
|
tokens = line.split(",")
|
||
|
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
|
||
|
for token in tokens:
|
||
|
key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip()
|
||
|
val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip()
|
||
|
self._data[key] = val
|
||
|
|
||
|
line = self.rdline()
|