1
0
mirror of https://github.com/bitdefender/bddisasm.git synced 2024-12-04 21:48:09 +00:00
bddisasm/tests/core/objects.py
2024-09-17 17:51:06 +03:00

247 lines
8.9 KiB
Python

#
# Copyright (c) 2024 Bitdefender
# SPDX-License-Identifier: Apache-2.0
#
import sys
import re
import json
import string
class UniqDict(dict):
def __setitem__(self, key: str, value: str) -> type(None):
if key in self.keys():
raise ValueError("key is already present : %s" % (key))
else:
return super(UniqDict, self).__setitem__(key, value)
class DecodeShemuParser(object):
def __init__(self, obj: str):
self._obj = obj
self._crt = 0
self._data = UniqDict()
def rdline(self) -> str:
"""
Reads the line corresponding to the current index from the string containing the result.
This function also increment the index to the current line.
"""
if self._crt == len(self._obj):
return None
out = self._obj[self._crt]
self._crt += 1
return out
def rdnline(self) -> str:
"""
Reads the next line corresponding to the current index from the string containing the result.
This function does not increment the index to the current line.
"""
if self._crt == len(self._obj):
return None
out = self._obj[self._crt]
return out
class ShemuResult(DecodeShemuParser):
def __init__(self, obj: str):
DecodeShemuParser.__init__(self, obj)
self.process()
def process(self) -> type(None):
"""
Parses the emulation result generated by disasmtool and stores it in a dictionary as key:value pair as follows:
{
"Emulation terminated with status": "0x0000000a",
"flags:": "0x80",
"NOPs": "0",
"NULLs": "0",
"total instructions": "2",
"unique instructions": "1",
"SHEMU_FLAG-0": "SHEMU_FLAG_HEAVENS_GATE"
}
"""
self._obj = self._obj.split("\n")
self._obj = list(filter(None, self._obj))
# Emulation terminated with status 0x0000000a, flags: 0x10, 0 NOPs, 0 NULLs, 10 total instructions, 10 unique instructions
line = self.rdline()
tokens = line.split(",")
tokens = list(filter(None, tokens))
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
for token in tokens:
tsplit = token.lstrip().rstrip().split(" ")
if (
all(c in string.hexdigits + "x" for c in tsplit[-1])
or tsplit[-1].isnumeric()
):
val = tsplit[-1]
key = " ".join(tsplit[:-1])
else:
val = tsplit[0]
key = " ".join(tsplit[1:])
self._data[key] = val
line = self.rdline()
cnt = 0
while line:
self._data["SHEMU_FLAG-%s" % (cnt)] = line.lstrip().rstrip()
cnt += 1
line = self.rdline()
class ShemuInstrux(DecodeShemuParser):
def __init__(self, obj: str):
DecodeShemuParser.__init__(self, obj)
self.process()
def process(self) -> type(None):
"""
Parses the emulation result generated by disasmtool and stores it in a dictionary as key:value pair as follows:
{
"RAX": "0x0000000000000000",
"RCX": "0x0000000000000000",
"RDX": "0x0000000000000000",
"RBX": "0x0000000000000000",
"RBP": "0x0000000000000000",
"RSI": "0x0000000000000000",
...
"R28": "0x0000000000000000",
"R29": "0x0000000000000000",
"R30": "0x0000000000000000",
"R31": "0x0000000000000000",
"RIP": "0x0000000000200000",
"RFLAGS": "0x0000000000000202"
}
"""
self._obj = self._obj.split("\n")
line = self.rdline()
cnt = 0
while line:
if " = " in line:
tokens = re.findall(r"\w+\s*=\s*[0x]*[\da-f]{4,16}", line)
for token in tokens:
key = token.lstrip().rstrip().split("=")[0].lstrip().rstrip()
val = token.lstrip().rstrip().split("=")[1].lstrip().rstrip()
self._data[key] = val
if "IP: " in line or "PC: " in line:
tokens = re.findall(r"\w\w\s*:\s*0x[\da-f]{16}", line)
key = tokens[0].lstrip().rstrip().split(":")[0].lstrip().rstrip()
val = tokens[0].lstrip().rstrip().split(":")[1].lstrip().rstrip()
self._data["%s-%s" % (key, "INFO")] = val
tokens = line.split(' ')
key = "InstructionBytes"
val = tokens[1].lstrip().rstrip()
self._data[key] = val
key = "InstructionText"
val = " ".join(tokens[2:]).lstrip().rstrip()
self._data[key] = val
elif "Detection: " in line:
tokens = re.findall(r"\w+\s*:\s*0x\d{16}", line)
key = tokens[0].lstrip().rstrip().split(":")[0].lstrip().rstrip()
val = tokens[0].lstrip().rstrip().split(":")[1].lstrip().rstrip()
self._data["%s-%d" % (key, cnt)] = val
cnt += 1
elif ":" in line:
tokens = re.findall(r"\w\w\s*:\s*\d{1}", line)
for token in tokens:
key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip()
val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip()
self._data[key] = val
line = self.rdline()
class DecodeInstrux(DecodeShemuParser):
def __init__(self, obj: str):
DecodeShemuParser.__init__(self, obj)
self.process()
def process(self) -> type(None):
"""
Parses an instruction generated by disasmtool and stores it in a dictionary as key:value pair as follows:
{
"InstructionBytes": "c4e2784900",
"InstructionText": "LDTILECFG zmmword ptr [rax]",
"RIP": "0000000000000000",
"DSIZE": "32",
"ASIZE": "64",
"VLEN": "-",
"ISA Set": "AMX-TILE",
...
"Operand-0": {
"Operand": "0",
"Acc": "R-",
"Type": "Memory",
"Size": "64",
"RawSize": "64",
"Encoding": "M",
"Segment": "3",
"Base": "0"
}
}
"""
self._obj = self._obj.split("\n")
line = self.rdline()
while line:
# 0000000000000000 c4e2784900 LDTILECFG zmmword ptr [rax]
# 0000000000000000 62 db 0x62 (0x80000002)
if re.search("^[0-9A-F]{16}", line):
tokens = line.split(" ")
tokens = list(filter(None, tokens))
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
self._data["InstructionBytes"] = tokens[1].lstrip().rstrip()
self._data["InstructionText"] = " ".join(tokens[2:]).lstrip().rstrip()
self._data["RIP"] = tokens[0].lstrip().rstrip()
# Operand: 0, Acc: RW, Type: Register, Size: 1, RawSize: 1, Encoding: M, RegType: General Purpose,
# RegSize: 1, RegId: 22, RegCount: 1
if "Operand:" in line:
while self.rdnline() and "Operand:" not in self.rdnline():
# if line.endswith(", ") or line.endswith(","):
line += self.rdline()
local = UniqDict()
tokens = line.split(",")
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
dc = 0
for token in tokens:
if key == "Decorator":
key = "%s-%s" % (key, dc)
dc += 1
key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip()
val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip()
local[key] = val
key = tokens[0].lstrip().rstrip().split(":")[0].lstrip().rstrip()
val = tokens[0].lstrip().rstrip().split(":")[1].lstrip().rstrip()
self._data["%s-%s" % (key, val)] = local
# EVEX Tuple Type: Tuple 1 scalar, 8 bit
# EVEX Tuple Type: Full
elif "EVEX" in line:
tokens = line.split(":")
key = tokens[0].lstrip().rstrip()
val = tokens[1].lstrip().rstrip()
self._data[key] = val
elif ": " in line:
tokens = line.split(",")
tokens = list(filter(lambda item: item.lstrip().rstrip(), tokens))
for token in tokens:
key = token.lstrip().rstrip().split(":")[0].lstrip().rstrip()
val = token.lstrip().rstrip().split(":")[1].lstrip().rstrip()
self._data[key] = val
line = self.rdline()