You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bddisasm/isagenerator/generate_tables.py

1272 lines
39 KiB

#!/usr/bin/env python3
#
# Copyright (c) 2020 Bitdefender
# SPDX-License-Identifier: Apache-2.0
#
import os
import sys
import re
import copy
import glob
import disasmlib
flags = {
'MODRM' : 'ND_FLAG_MODRM',
'II64' : 'ND_FLAG_I64',
'F64' : 'ND_FLAG_F64',
'D64' : 'ND_FLAG_D64',
'O64' : 'ND_FLAG_O64',
'SSECONDB' : 'ND_FLAG_SSE_CONDB',
'COND' : 'ND_FLAG_COND',
'VSIB' : 'ND_FLAG_VSIB',
'MIB' : 'ND_FLAG_MIB',
'LIG' : 'ND_FLAG_LIG',
'WIG' : 'ND_FLAG_WIG',
'3DNOW' : 'ND_FLAG_3DNOW',
'MMASK' : 'ND_FLAG_MMASK',
'NOMZ' : 'ND_FLAG_NOMZ',
'LOCKSP' : 'ND_FLAG_LOCK_SPECIAL',
'NOL0' : 'ND_FLAG_NOL0',
'NOA16' : 'ND_FLAG_NOA16',
'NO66' : 'ND_FLAG_NO66',
'NORIPREL' : 'ND_FLAG_NO_RIP_REL',
'VECT' : 'ND_FLAG_VECTOR',
'S66' : 'ND_FLAG_S66',
'BITBASE' : 'ND_FLAG_BITBASE',
'AG' : 'ND_FLAG_AG',
'SHS' : 'ND_FLAG_SHS',
'MFR' : 'ND_FLAG_MFR',
'CETT' : 'ND_FLAG_CETT',
'SERIAL' : 'ND_FLAG_SERIAL',
'SIBMEM' : 'ND_FLAG_SIBMEM',
'I67' : 'ND_FLAG_I67',
'IER' : 'ND_FLAG_IER',
}
prefixes_map = {
'REP' : 'ND_PREF_REP',
'REPC' : 'ND_PREF_REPC',
'HLE' : 'ND_PREF_HLE',
'BND' : 'ND_PREF_BND',
'LOCK' : 'ND_PREF_LOCK',
'BH' : 'ND_PREF_BHINT',
'XACQUIRE' : 'ND_PREF_XACQUIRE',
'XRELEASE' : 'ND_PREF_XRELEASE',
'HLEWOL' : 'ND_PREF_HLE_WO_LOCK',
'DNT' : 'ND_PREF_DNT',
}
decorators_map = {
'MASK' : 'ND_DECO_MASK',
'BROADCAST': 'ND_DECO_BROADCAST',
'ZERO' : 'ND_DECO_ZERO',
'SAE' : 'ND_DECO_SAE',
'ER' : 'ND_DECO_ER',
}
# Per operand flags.
opflags = {
'OPDEF' : 'ND_OPF_DEFAULT', # Default operand. Not encoded anywhere.
'OPSEXO1' : 'ND_OPF_SEX_OP1',
'OPSEXDW' : 'ND_OPF_SEX_DWS',
}
# Explicit operands map.
optype = {
'A' : 'ND_OPT_A',
'B' : 'ND_OPT_B',
'C' : 'ND_OPT_C',
'D' : 'ND_OPT_D',
'E' : 'ND_OPT_E',
'F' : 'ND_OPT_F',
'G' : 'ND_OPT_G',
'H' : 'ND_OPT_H',
'I' : 'ND_OPT_I',
'J' : 'ND_OPT_J',
'K' : 'ND_OPT_K',
'L' : 'ND_OPT_L',
'M' : 'ND_OPT_M',
'N' : 'ND_OPT_N',
'O' : 'ND_OPT_O',
'P' : 'ND_OPT_P',
'Q' : 'ND_OPT_Q',
'R' : 'ND_OPT_R',
'S' : 'ND_OPT_S',
'T' : 'ND_OPT_T',
'U' : 'ND_OPT_U',
'V' : 'ND_OPT_V',
'W' : 'ND_OPT_W',
'X' : 'ND_OPT_X',
'Y' : 'ND_OPT_Y',
'Z' : 'ND_OPT_Z',
'rB' : 'ND_OPT_rB',
'mB' : 'ND_OPT_mB',
'rK' : 'ND_OPT_rK',
'vK' : 'ND_OPT_vK',
'mK' : 'ND_OPT_mK',
'aK' : 'ND_OPT_aK',
'rM' : 'ND_OPT_rM',
'mM' : 'ND_OPT_mM',
'rT' : 'ND_OPT_rT',
'mT' : 'ND_OPT_mT',
'vT' : 'ND_OPT_vT',
# Implicit operands.
'1' : 'ND_OPT_CONST_1',
'AH' : 'ND_OPT_GPR_AH',
'rAX' : 'ND_OPT_GPR_rAX',
'rCX' : 'ND_OPT_GPR_rCX',
'rDX' : 'ND_OPT_GPR_rDX',
'rBX' : 'ND_OPT_GPR_rBX',
'rSP' : 'ND_OPT_GPR_rSP',
'rBP' : 'ND_OPT_GPR_rBP',
'rSI' : 'ND_OPT_GPR_rSI',
'rDI' : 'ND_OPT_GPR_rDI',
'rR11' : 'ND_OPT_GPR_rR11',
'rIP' : 'ND_OPT_RIP',
'CS' : 'ND_OPT_SEG_CS',
'SS' : 'ND_OPT_SEG_SS',
'DS' : 'ND_OPT_SEG_DS',
'ES' : 'ND_OPT_SEG_ES',
'FS' : 'ND_OPT_SEG_FS',
'GS' : 'ND_OPT_SEG_GS',
'ST(0)' : 'ND_OPT_FPU_ST0',
'ST(i)' : 'ND_OPT_FPU_STX',
'XMM0' : 'ND_OPT_SSE_XMM0',
# Memory operands
'pBXAL' : 'ND_OPT_MEM_rBX_AL',
'pDI' : 'ND_OPT_MEM_rDI',
'SHS' : 'ND_OPT_MEM_SHS',
'SHS0' : 'ND_OPT_MEM_SHS0',
'SHSP' : 'ND_OPT_MEM_SHSP',
# Special immediates.
'm2zI' : 'ND_OPT_Im2z',
# System registers, MSRs, XCRs, etc.
'GDTR' : 'ND_OPT_SYS_GDTR',
'IDTR' : 'ND_OPT_SYS_IDTR',
'LDTR' : 'ND_OPT_SYS_LDTR',
'TR' : 'ND_OPT_SYS_TR',
'CR0' : 'ND_OPT_CR_0',
'XCR' : 'ND_OPT_XCR',
'XCR0' : 'ND_OPT_XCR_0',
'MSR' : 'ND_OPT_MSR',
'FSBASE' : 'ND_OPT_MSR_FSBASE',
'GSBASE' : 'ND_OPT_MSR_GSBASE',
'KGSBASE' : 'ND_OPT_MSR_KGSBASE',
'SCS' : 'ND_OPT_MSR_SCS',
'SEIP' : 'ND_OPT_MSR_SEIP',
'SESP' : 'ND_OPT_MSR_SESP',
'TSC' : 'ND_OPT_MSR_TSC',
'TSCAUX' : 'ND_OPT_MSR_TSCAUX',
'STAR' : 'ND_OPT_MSR_STAR',
'LSTAR' : 'ND_OPT_MSR_LSTAR',
'FMASK' : 'ND_OPT_MSR_FMASK',
'BANK' : 'ND_OPT_REG_BANK',
'X87CONTROL':'ND_OPT_X87_CONTROL',
'X87TAG' : 'ND_OPT_X87_TAG',
'X87STATUS': 'ND_OPT_X87_STATUS',
'MXCSR' : 'ND_OPT_MXCSR',
'PKRU' : 'ND_OPT_PKRU',
'SSP' : 'ND_OPT_SSP',
}
opsize = {
'a' : 'ND_OPS_a',
'b' : 'ND_OPS_b',
'c' : 'ND_OPS_c',
'd' : 'ND_OPS_d',
'dq' : 'ND_OPS_dq',
'e' : 'ND_OPS_e',
'f' : 'ND_OPS_f',
'h' : 'ND_OPS_h',
'n' : 'ND_OPS_n',
'u' : 'ND_OPS_u',
'vm32x' : 'ND_OPS_vm32x',
'vm32y' : 'ND_OPS_vm32y',
'vm32z' : 'ND_OPS_vm32z',
'vm32h' : 'ND_OPS_vm32h',
'vm32n' : 'ND_OPS_vm32n',
'vm64x' : 'ND_OPS_vm64x',
'vm64y' : 'ND_OPS_vm64y',
'vm64z' : 'ND_OPS_vm64z',
'vm64h' : 'ND_OPS_vm64h',
'vm64n' : 'ND_OPS_vm64n',
'mib' : 'ND_OPS_mib',
'v2' : 'ND_OPS_v2',
'v3' : 'ND_OPS_v3',
'v4' : 'ND_OPS_v4',
'v8' : 'ND_OPS_v8',
'oq' : 'ND_OPS_oq',
'p' : 'ND_OPS_p',
'pd' : 'ND_OPS_pd',
'ps' : 'ND_OPS_ps',
'q' : 'ND_OPS_q',
'qq' : 'ND_OPS_qq',
's' : 'ND_OPS_s',
'sd' : 'ND_OPS_sd',
'ss' : 'ND_OPS_ss',
'v' : 'ND_OPS_v',
'w' : 'ND_OPS_w',
'x' : 'ND_OPS_x',
'y' : 'ND_OPS_y',
'yf' : 'ND_OPS_yf',
'z' : 'ND_OPS_z',
'?' : 'ND_OPS_unknown',
'0' : 'ND_OPS_0',
'asz' : 'ND_OPS_asz',
'ssz' : 'ND_OPS_ssz',
'fa' : 'ND_OPS_fa',
'fw' : 'ND_OPS_fw',
'fd' : 'ND_OPS_fd',
'fq' : 'ND_OPS_fq',
'ft' : 'ND_OPS_ft',
'fe' : 'ND_OPS_fe',
'fs' : 'ND_OPS_fs',
'l' : 'ND_OPS_l',
'rx' : 'ND_OPS_rx',
'cl' : 'ND_OPS_cl',
'12' : 'ND_OPS_12',
't' : 'ND_OPS_t',
}
opdecorators = {
'{K}' : 'ND_OPD_MASK',
'{z}' : 'ND_OPD_Z',
'{sae}' : 'ND_OPD_SAE',
'{er}' : 'ND_OPD_ER',
'|B32' : 'ND_OPD_B32',
'|B64' : 'ND_OPD_B64',
}
accessmap = {
'R' : 'ND_OPA_R',
'W' : 'ND_OPA_W',
'CR' : 'ND_OPA_CR',
'CW' : 'ND_OPA_CW',
'RW' : 'ND_OPA_RW',
'RCW' : 'ND_OPA_RCW',
'CRW' : 'ND_OPA_CRW',
'CRCW' : 'ND_OPA_CRCW',
'P' : 'ND_OPA_P',
'N' : 'ND_OPA_N',
}
tuples = {
None : '0',
'fv' : 'ND_TUPLE_FV',
'hv' : 'ND_TUPLE_HV',
'fvm' : 'ND_TUPLE_FVM',
'hvm' : 'ND_TUPLE_HVM',
'qvm' : 'ND_TUPLE_QVM',
'ovm' : 'ND_TUPLE_OVM',
'dup' : 'ND_TUPLE_DUP',
'm128' : 'ND_TUPLE_M128',
't1s8' : 'ND_TUPLE_T1S8',
't1s16' : 'ND_TUPLE_T1S16',
't1s' : 'ND_TUPLE_T1S',
't1f' : 'ND_TUPLE_T1F',
't2' : 'ND_TUPLE_T2',
't4' : 'ND_TUPLE_T4',
't8' : 'ND_TUPLE_T8',
't1_4x' : 'ND_TUPLE_T1_4X',
}
extype = {
None : '0',
# SSE/AVX
'1' : 'ND_EXT_1',
'2' : 'ND_EXT_2',
'3' : 'ND_EXT_3',
'4' : 'ND_EXT_4',
'5' : 'ND_EXT_5',
'6' : 'ND_EXT_6',
'7' : 'ND_EXT_7',
'8' : 'ND_EXT_8',
'9' : 'ND_EXT_9',
'10' : 'ND_EXT_10',
'11' : 'ND_EXT_11',
'12' : 'ND_EXT_12',
'13' : 'ND_EXT_13',
# EVEX
'E1' : 'ND_EXT_E1',
'E1NF' : 'ND_EXT_E1NF',
'E2' : 'ND_EXT_E2',
'E3' : 'ND_EXT_E3',
'E3NF' : 'ND_EXT_E3NF',
'E4' : 'ND_EXT_E4',
'E4nb' : 'ND_EXT_E4nb',
'E4NF' : 'ND_EXT_E4NF',
'E4NFnb': 'ND_EXT_E4NFnb',
'E5' : 'ND_EXT_E5',
'E5NF' : 'ND_EXT_E5NF',
'E6' : 'ND_EXT_E6',
'E6NF' : 'ND_EXT_E6NF',
'E7NM' : 'ND_EXT_E7NM',
'E9' : 'ND_EXT_E9',
'E9NF' : 'ND_EXT_E9NF',
'E10' : 'ND_EXT_E10',
'E10NF' : 'ND_EXT_E10NF',
'E11' : 'ND_EXT_E11',
'E12' : 'ND_EXT_E12',
'E12NP' : 'ND_EXT_E12NP',
# Opmask
'K20' : 'ND_EXT_K20',
'K21' : 'ND_EXT_K21',
# AMX
'AMX_E1': 'ND_EXT_AMX_E1',
'AMX_E2': 'ND_EXT_AMX_E2',
'AMX_E3': 'ND_EXT_AMX_E3',
'AMX_E4': 'ND_EXT_AMX_E4',
'AMX_E5': 'ND_EXT_AMX_E5',
'AMX_E6': 'ND_EXT_AMX_E6',
}
modes = {
'r0' : 'ND_MOD_R0',
'r1' : 'ND_MOD_R1',
'r2' : 'ND_MOD_R2',
'r3' : 'ND_MOD_R3',
'real' : 'ND_MOD_REAL',
'v8086' : 'ND_MOD_V8086',
'prot' : 'ND_MOD_PROT',
'compat': 'ND_MOD_COMPAT',
'long' : 'ND_MOD_LONG',
'smm' : 'ND_MOD_SMM',
'sgx' : 'ND_MOD_SGX',
'tsx' : 'ND_MOD_TSX',
'vmxr' : 'ND_MOD_VMXR',
'vmxn' : 'ND_MOD_VMXN',
'vmxo' : 'ND_MOD_VMXO',
}
indexes = {
"root" : 0,
"None" : 0,
None : 0,
# modrm.mod
"mem" : 0,
"reg" : 1,
# mandatory prefixes
"NP" : 0,
"66" : 1,
"F3" : 2,
"F2" : 3,
# other prefixes
"rex" : 1,
"rexw" : 2,
"64" : 3,
"aF3" : 4,
"rep" : 5,
"sib" : 6,
# Mode
"m16" : 1,
"m32" : 2,
"m64" : 3,
# Default data size
"ds16" : 1,
"ds32" : 2,
"ds64" : 3,
"dds64" : 4,
"fds64" : 5,
# Default address size
"as16" : 1,
"as32" : 2,
"as64" : 3,
# Vendor redirection.
"any" : 0,
"intel" : 1,
"amd" : 2,
"geode" : 3,
"cyrix" : 4,
}
ilut = {
"root" : ("ND_ILUT_ROOT", 1, "ND_TABLE"),
"opcode" : ("ND_ILUT_OPCODE", 256, "ND_TABLE_OPCODE"),
"opcode_3dnow" : ("ND_ILUT_OPCODE_3DNOW", 256, "ND_TABLE_OPCODE"),
"modrmmod" : ("ND_ILUT_MODRM_MOD", 2, "ND_TABLE_MODRM_MOD"),
"modrmmodpost" : ("ND_ILUT_MODRM_MOD", 2, "ND_TABLE_MODRM_MOD"),
"modrmreg" : ("ND_ILUT_MODRM_REG", 8, "ND_TABLE_MODRM_REG"),
"modrmrm" : ("ND_ILUT_MODRM_RM", 8, "ND_TABLE_MODRM_RM"),
"mprefix" : ("ND_ILUT_MAN_PREFIX", 4, "ND_TABLE_MPREFIX"),
"mode" : ("ND_ILUT_MODE", 4, "ND_TABLE_MODE"),
"dsize" : ("ND_ILUT_DSIZE", 6, "ND_TABLE_DSIZE"),
"asize" : ("ND_ILUT_ASIZE", 4, "ND_TABLE_ASIZE"),
"auxiliary" : ("ND_ILUT_AUXILIARY", 6, "ND_TABLE_AUXILIARY"),
"vendor" : ("ND_ILUT_VENDOR", 6, "ND_TABLE_VENDOR"),
"mmmmm" : ("ND_ILUT_VEX_MMMMM", 32, "ND_TABLE_VEX_MMMMM"),
"pp" : ("ND_ILUT_VEX_PP", 4, "ND_TABLE_VEX_PP"),
"l" : ("ND_ILUT_VEX_L", 4, "ND_TABLE_VEX_L"),
"w" : ("ND_ILUT_VEX_W", 2, "ND_TABLE_VEX_W"),
}
mnemonics = []
mnemonics_prefix = []
instructions = []
prefixes = []
features = []
#
# Convert one operand into it's C/C++ representation.
#
def cdef_operand(self):
return "OP(%s, %s, %s, %s, %s, %d)" % (optype[self.Type], opsize[self.Size], \
'|'.join([opflags[x] for x in self.Flags]) or '0', accessmap[self.Access], \
'|'.join([opdecorators[x] for x in self.Decorators]) or 0, self.Block)
disasmlib.Operand.cdef = cdef_operand
#
# Convert one instruction into it's C/C++ representation.
#
def cdef_instruction(self):
c = ''
c += ' // Pos:%d Instruction:"%s" Encoding:"%s"/"%s"\n' % \
(self.Icount, self.__str__(), self.RawEnc, ''.join([x.Encoding for x in self.ExpOps]).replace('S', ''))
c += ' {\n '
# Add the instruction class
c += 'ND_INS_' + self.Class + ', '
# Add the instruction type
c += 'ND_CAT_' + self.Category + ', '
# Add the instruction set
c += 'ND_SET_' + self.Set + ', '
# Add the mneomonic index.
c += '%d, ' % (mnemonics.index(self.Mnemonic))
c += '\n '
# Add the valid modes map.
all = True
for m in modes:
if m not in self.Modes:
all = False
if all:
c += 'ND_MOD_ANY, '
else:
c += '|'.join([modes[m] for m in self.Modes]) + ', '
c += '\n '
# Add the prefixes map.
c += '|'.join([prefixes_map[x] for x in self.Prefmap] or '0') + ', '
# Add the decorators map.
c += '|'.join([decorators_map[x] for x in self.DecoFlags] or '0') + ', '
# Add the tuple type and the explicit operands count.
c += 'ND_OPS_CNT(%d, %d), ' % (len(self.ExpOps), len(self.ImpOps))
exclass = None
if self.ExClass:
if self.ExClass in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']:
exclass = 'ND_EXC_SSE_AVX'
elif self.ExClass in ['K20', 'K21']:
exclass = 'ND_EXC_OPMASK'
elif self.ExClass.startswith('AMX_'):
exclass = 'ND_EXC_AMX'
else:
exclass = 'ND_EXC_EVEX'
if self.Evex:
# EVEX encoded instructions, store the tuple type.
c += '%s, ' % (tuples[self.Tuple])
else:
c += '0, '
# Store exception type & class, if any.
if exclass:
c += '%s, %s, ' % (extype[self.ExClass], exclass)
else:
c += '0, 0, '
# Add the FPU flags access, if the instruction is fpu.
if self.Set == 'X87':
value = 0
acc = { '0': 0, '1': 1, 'm': 2, 'u': 3 }
for i in range(0, 4):
value |= acc[self.FpuFlags[i]] << (i * 2)
c += '0x%02x, ' % value
else:
c += '0, '
# The 2 reserved fields.
c += '0, 0, '
# Add the instruction flags
fs = '|'.join([flags[x] for x in self.Flags if x != 'nil' and not x.startswith('OP1') and not x.startswith('OP2')\
and not x.startswith('OP3') and not x.startswith('OP4')\
and not x.startswith('OP5') and not x.startswith('OP6')\
]) or 0
c += '%s, ' % fs
# Store the CPUID flag, if any
flg = "0"
for feat in features:
if feat.Name == self.Id:
flg = "ND_CFF_%s" % feat.Name
c += "%s, " % flg
# Store the accessed flags, if any.
for m in ['t', 'm', '1', '0']:
flg = "0"
dst = self.RevFlagsAccess[m]
if m == '1' or m == '0':
dst = dst + self.RevFlagsAccess['u']
for f in dst:
flg += '|REG_RFLAG_%s' % f.upper()
c += "\n %s," % flg
# Add the instruction operands
for op in self.ExpOps + self.ImpOps:
c += "\n " + op.cdef() + ", "
c += '\n }'
return c
disasmlib.Instruction.cdef = cdef_instruction
#
# Initially, t is an empty hash-table.
#
def group_instructions(ilist):
d = { }
is3dnow = False
priorities = ["opcode", "vendor", "modrmmod", "modrmreg", "modrmmodpost", "modrmrm", "mprefix", "mode", "dsize", \
"asize", "auxiliary", "_"]
for i in ilist:
if '3DNOW' in i.Flags:
is3dnow = True
else:
is3dnow = False
if i.Spec["opcodes"]:
if is3dnow:
d["__TYPE__"] = "opcode_3dnow"
else:
d["__TYPE__"] = "opcode"
elif i.Spec["mpre"] and i.ModrmRedirAfterMpref:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-1:]:
d["__TYPE__"] = "mprefix"
elif i.Spec["vendor"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-10:]:
d["__TYPE__"] = "vendor"
elif i.Spec["modrm"]["mod"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-9:]:
d["__TYPE__"] = "modrmmod"
elif i.Spec["modrm"]["reg"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-8:]:
d["__TYPE__"] = "modrmreg"
elif i.Spec["modrm"]["modpost"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-7:]:
d["__TYPE__"] = "modrmmodpost"
elif i.Spec["modrm"]["rm"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-6:]:
d["__TYPE__"] = "modrmrm"
elif i.Spec["mpre"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-5:]:
d["__TYPE__"] = "mprefix"
elif i.Spec["mode"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-4:]:
d["__TYPE__"] = "mode"
elif i.Spec["dsize"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-3:]:
d["__TYPE__"] = "dsize"
elif i.Spec["asize"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-2:]:
d["__TYPE__"] = "asize"
elif i.Spec["opre"]:
if "__TYPE__" not in d or d["__TYPE__"] in priorities[-1:]:
d["__TYPE__"] = "auxiliary"
elif len(ilist) == 1:
return ilist[0]
for i in ilist:
if d["__TYPE__"] in ["opcode", "opcode_3dnow"]:
# Opcode redirection, add the next opcode to the hash, and remove it from the spec.
if int(i.Spec["opcodes"][0], 16) not in d:
d[int(i.Spec["opcodes"][0], 16)] = [i]
else:
d[int(i.Spec["opcodes"][0], 16)].append(i)
# Remove the opcode for this instruction.
del i.Spec["opcodes"][0]
elif d["__TYPE__"] == "modrmmod":
if not i.Spec["modrm"]["mod"]:
if "mem" not in d:
d["mem"] = [i]
else:
d["mem"].append(i)
if "reg" not in d:
d["reg"] = [copy.deepcopy(i)]
else:
d["reg"].append(copy.deepcopy(i))
else:
if i.Spec["modrm"]["mod"] not in d:
d[i.Spec["modrm"]["mod"]] = [i]
else:
d[i.Spec["modrm"]["mod"]].append(i)
# Remove the mod specifier.
i.Spec["modrm"]["mod"] = None
elif d["__TYPE__"] == "modrmreg":
if int(i.Spec["modrm"]["reg"]) not in d:
d[int(i.Spec["modrm"]["reg"])] = [i]
else:
d[int(i.Spec["modrm"]["reg"])].append(i)
# Remove the reg specifier
i.Spec["modrm"]["reg"] = None
elif d["__TYPE__"] == "modrmmodpost":
if not i.Spec["modrm"]["modpost"]:
if "mem" not in d:
d["mem"] = [i]
else:
d["mem"].append(i)
if "reg" not in d:
d["reg"] = [copy.deepcopy(i)]
else:
d["reg"].append(copy.deepcopy(i))
else:
if i.Spec["modrm"]["modpost"] not in d:
d[i.Spec["modrm"]["modpost"]] = [i]
else:
d[i.Spec["modrm"]["modpost"]].append(i)
# Remove the modpost specifier.
i.Spec["modrm"]["modpost"] = None
elif d["__TYPE__"] == "modrmrm":
if int(i.Spec["modrm"]["rm"]) not in d:
d[int(i.Spec["modrm"]["rm"])] = [i]
else:
d[int(i.Spec["modrm"]["rm"])].append(i)
# Remove the reg specifier
i.Spec["modrm"]["rm"] = None
elif d["__TYPE__"] == "mprefix":
if not i.Spec["mpre"]:
p = "None"
else:
p = i.Spec["mpre"][0]
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the prefix from the list.
if p != "None":
del i.Spec["mpre"][0]
elif d["__TYPE__"] == "mode":
if not i.Spec["mode"]:
p = "None"
else:
p = i.Spec["mode"][0]
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the auxiliary redirector
if p != "None":
del i.Spec["mode"][0]
elif d["__TYPE__"] == "dsize":
if not i.Spec["dsize"]:
p = "None"
else:
p = i.Spec["dsize"][0]
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the auxiliary redirector
if p != "None":
del i.Spec["dsize"][0]
elif d["__TYPE__"] == "asize":
if not i.Spec["asize"]:
p = "None"
else:
p = i.Spec["asize"][0]
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the auxiliary redirector
if p != "None":
del i.Spec["asize"][0]
elif d["__TYPE__"] == "auxiliary":
if not i.Spec["opre"]:
p = "None"
else:
p = i.Spec["opre"][0]
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the auxiliary redirector
if p != "None":
del i.Spec["opre"][0]
elif d["__TYPE__"] == "vendor":
if not i.Spec["vendor"]:
p = "None"
else:
p = i.Spec["vendor"]
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the vendor redirector
if p != "None":
i.Spec["vendor"] = None
else:
print("Don't know what to do!")
raise Exception("WTF???")
return d
def group_instructions_vex_xop_evex(ilist):
d = { }
for i in ilist:
if i.Spec["mmmmm"]:
d["__TYPE__"] = "mmmmm"
elif i.Spec["opcodes"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w", "l", "pp", "modrmrm", "modrmmodpost", "modrmreg", \
"modrmmod"]:
d["__TYPE__"] = "opcode"
elif i.Spec["pp"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w", "l", "modrmrm", "modrmmodpost", "modrmreg"]:
d["__TYPE__"] = "pp"
elif i.Spec["modrm"]["mod"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w", "l", "modrmrm", "modrmmodpost", "modrmreg"]:
d["__TYPE__"] = "modrmmod"
elif i.Spec["modrm"]["reg"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w", "l", "modrmrm", "modrmmodpost"]:
d["__TYPE__"] = "modrmreg"
elif i.Spec["modrm"]["modpost"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w", "l", "modrmrm"]:
d["__TYPE__"] = "modrmmodpost"
elif i.Spec["modrm"]["rm"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w", "l"]:
d["__TYPE__"] = "modrmrm"
elif i.Spec["l"]:
if "__TYPE__" not in d or d["__TYPE__"] in ["w"]:
d["__TYPE__"] = "l"
elif i.Spec["w"]:
if "__TYPE__" not in d:
d["__TYPE__"] = "w"
elif len(ilist) == 1:
return ilist[0]
for i in ilist:
if d["__TYPE__"] == "mmmmm":
if int(i.Spec["mmmmm"], 16) not in d:
d[int(i.Spec["mmmmm"], 16)] = [i]
else:
d[int(i.Spec["mmmmm"], 16)].append(i)
i.Spec["mmmmm"] = None
elif d["__TYPE__"] == "opcode":
# Opcode redirection, add the next opcode to the hash, and remove it from the spec.
if int(i.Spec["opcodes"][0], 16) not in d:
d[int(i.Spec["opcodes"][0], 16)] = [i]
else:
d[int(i.Spec["opcodes"][0], 16)].append(i)
# Remove the opcode for this instruction.
del i.Spec["opcodes"][0]
elif d["__TYPE__"] == "modrmmod":
if not i.Spec["modrm"]["mod"]:
if "mem" not in d:
d["mem"] = [i]
else:
d["mem"].append(i)
if "reg" not in d:
d["reg"] = [copy.deepcopy(i)]
else:
d["reg"].append(copy.deepcopy(i))
else:
if i.Spec["modrm"]["mod"] not in d:
d[i.Spec["modrm"]["mod"]] = [i]
else:
d[i.Spec["modrm"]["mod"]].append(i)
# Remove the mod specifier.
i.Spec["modrm"]["mod"] = None
elif d["__TYPE__"] == "modrmreg":
if int(i.Spec["modrm"]["reg"]) not in d:
d[int(i.Spec["modrm"]["reg"])] = [i]
else:
d[int(i.Spec["modrm"]["reg"])].append(i)
# Remove the reg specifier
i.Spec["modrm"]["reg"] = None
elif d["__TYPE__"] == "modrmmodpost":
if not i.Spec["modrm"]["modpost"]:
if "mem" not in d:
d["mem"] = [i]
else:
d["mem"].append(i)
if "reg" not in d:
d["reg"] = [copy.deepcopy(i)]
else:
d["reg"].append(copy.deepcopy(i))
else:
if i.Spec["modrm"]["modpost"] not in d:
d[i.Spec["modrm"]["modpost"]] = [i]
else:
d[i.Spec["modrm"]["modpost"]].append(i)
# Remove the modpost specifier.
i.Spec["modrm"]["modpost"] = None
elif d["__TYPE__"] == "modrmrm":
if int(i.Spec["modrm"]["rm"]) not in d:
d[int(i.Spec["modrm"]["rm"])] = [i]
else:
d[int(i.Spec["modrm"]["rm"])].append(i)
# Remove the reg specifier
i.Spec["modrm"]["rm"] = None
elif d["__TYPE__"] == "pp":
p = int(i.Spec["pp"])
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the prefix from the list.
i.Spec["pp"] = None
elif d["__TYPE__"] == "l":
p = int(i.Spec["l"])
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the prefix from the list.
i.Spec["l"] = None
elif d["__TYPE__"] == "w":
p = int(i.Spec["w"])
if p not in d:
d[p] = [i]
else:
d[p].append(i)
# Remove the prefix from the list.
i.Spec["w"] = None
else:
print("Don't know what to do!")
raise Exception("WTF???")
return d
def build_hash_tree2(t, cbk):
for k in t:
if type(disasmlib.Instruction) == type(t[k]):
# Instruction, leaf, we're done.
continue
elif type([]) == type(t[k]):
# List, group the instructions, and recurse.
t[k] = cbk(t[k])
if type({}) == type(t[k]):
build_hash_tree2(t[k], cbk)
def dump_hash_tree2(t, level = 0):
if type(t) == type({}):
for h in t:
if h == "__TYPE__":
continue
print("%s %s (type: %s)" % (" " * level, h, t["__TYPE__"]))
dump_hash_tree2(t[h], level + 1)
else:
print(" " * level, t)
#
#
#
def generate_translations2(instructions):
table_st = []
table_xop = []
table_vex = []
table_evex = []
hash_st = {}
hash_vex = {}
hash_xop = {}
hash_evex = {}
# Distribute each instruction type into its own table.
for i in instructions:
if i.Vex:
table_vex.append(i)
elif i.Xop:
table_xop.append(i)
elif i.Evex:
table_evex.append(i)
else:
table_st.append(i)
hash_st["__TYPE__"] = "root"
hash_st["root"] = table_st
build_hash_tree2(hash_st, group_instructions)
hash_vex["__TYPE__"] = "root"
hash_vex["root"] = table_vex
build_hash_tree2(hash_vex, group_instructions_vex_xop_evex)
hash_xop["__TYPE__"] = "root"
hash_xop["root"] = table_xop
build_hash_tree2(hash_xop, group_instructions_vex_xop_evex)
hash_evex["__TYPE__"] = "root"
hash_evex["root"] = table_evex
build_hash_tree2(hash_evex, group_instructions_vex_xop_evex)
# Dump'em!
#print "###########################################################################################################"
#dump_hash_tree2(hash_st)
print('Writing the table_root.h file...')
f = open(r'../bddisasm/include/table_root.h', 'wt')
f.write("#ifndef TABLE_ROOT_H\n")
f.write("#define TABLE_ROOT_H\n\n")
dump_translation_tree_c(hash_st, 'gRootTable', f)
f.write("\n#endif\n\n")
f.close()
#print "###########################################################################################################"
#dump_hash_tree2(hash_vex)
print('Writing the table_vex.h file...')
f = open(r'../bddisasm/include/table_vex.h', 'wt')
f.write("#ifndef TABLE_VEX_H\n")
f.write("#define TABLE_VEX_H\n\n")
dump_translation_tree_c(hash_vex, 'gVexTable', f)
f.write("\n#endif\n\n")
f.close()
#print "###########################################################################################################"
#dump_hash_tree2(hash_xop)
print('Writing the table_xop.h file...')
f = open(r'../bddisasm/include/table_xop.h', 'wt')
f.write("#ifndef TABLE_XOP_H\n")
f.write("#define TABLE_XOP_H\n\n")
dump_translation_tree_c(hash_xop, 'gXopTable', f)
f.write("\n#endif\n\n")
f.close()
#print "###########################################################################################################"
#dump_hash_tree2(hash_evex)
print('Writing the table_evex.h file...')
f = open(r'../bddisasm/include/table_evex.h', 'wt')
f.write("#ifndef TABLE_EVEX_H\n")
f.write("#define TABLE_EVEX_H\n\n")
dump_translation_tree_c(hash_evex, 'gEvexTable', f)
f.write("\n#endif\n\n")
f.close()
#print "###########################################################################################################"
return [hash_st, hash_vex, hash_xop, hash_evex]
def generate_mnemonics(instructions):
mnemonics = []
for i in instructions:
mnemonics.append(i.Mnemonic)
return sorted(set(mnemonics))
def generate_constants(lst, pre = False):
constants = []
for i in lst:
if pre:
constants.append('ND_PRE_' + i.Mnemonic)
else:
constants.append('ND_INS_' + i.Class)
return sorted(set(constants))
def generate_constants2(instructions):
constants_sets, constants_types = [], []
for i in instructions:
constants_sets.append('ND_SET_' + i.Set)
constants_types.append('ND_CAT_' + i.Category)
return sorted(set(constants_sets)), sorted(set(constants_types))
def dump_mnemonics(mnemonics, prefixes, fname):
f = open(fname, 'wt')
f.write('#ifndef MNEMONICS_H\n')
f.write('#define MNEMONICS_H\n')
f.write('\n')
f.write('const char *gMnemonics[%d] = \n' % len(mnemonics))
f.write('{\n')
f.write(' ')
i = 0
ln = 0
for m in mnemonics:
f.write('"%s", ' % m)
ln += len(m) + 4
i += 1
if ln > 60:
ln = 0
f.write('\n ')
f.write('\n};\n\n\n')
f.write('const char *gPrefixes[%d] = \n' % len(prefixes))
f.write('{\n')
f.write(' ')
i = 0
for p in prefixes:
f.write('"%s", ' % p)
i += 1
if i % 8 == 0:
f.write('\n ')
f.write('\n};\n\n#endif\n\n')
f.close()
def dump_constants(constants, prefixes, constants_sets, constants_types, fname):
f = open(fname, 'wt')
f.write('//\n')
f.write('// This file was auto-generated by generate_tables.py from defs.dat. DO NOT MODIFY!\n')
f.write('//\n\n')
f.write('#ifndef CONSTANTS_H\n')
f.write('#define CONSTANTS_H\n\n')
f.write('\n')
f.write('typedef enum _ND_INS_CLASS\n')
f.write('{\n')
f.write(' ND_INS_INVALID = 0,\n')
for c in constants:
f.write(' %s,\n' % c)
f.write('\n} ND_INS_CLASS;\n\n\n')
# Now the instruction sets.
f.write('typedef enum _ND_INS_SET\n')
f.write('{\n')
f.write(' ND_SET_INVALID = 0,\n')
for c in constants_sets:
f.write(' %s,\n' % c)
f.write('\n} ND_INS_SET;\n\n\n')
# Now the instruction types.
f.write('typedef enum _ND_INS_TYPE\n')
f.write('{\n')
f.write(' ND_CAT_INVALID = 0,\n')
for c in constants_types:
f.write(' %s,\n' % c)
f.write('\n} ND_INS_CATEGORY;\n\n\n')
# Done!
f.write('\n#endif\n')
f.close()
def dump_tree(translations, level = 0):
if type(translations) != type([]):
print('%s%s' % (level * ' ', translations))
else:
for i in range(0, len(translations), 1):
if len(translations) == 1:
dump_tree(translations[i], level + 1)
else:
dump_tree(translations[i], level + 2)
def generate_master_table(instructions, fname):
f = open(fname, 'wt')
f.write('//\n')
f.write('// This file was auto-generated by generate_tables.py from defs.dat. DO NOT MODIFY!\n')
f.write('//\n\n')
f.write('#ifndef INSTRUCTIONS_H\n')
f.write('#define INSTRUCTIONS_H\n')
f.write('\n')
flags = []
f.write('const ND_INSTRUCTION gInstructions[%s] = \n' % len(instructions))
f.write('{\n')
for i in instructions:
f.write('%s, \n\n' % i.cdef())
f.write('\n};\n')
f.write('\n#endif\n')
f.close()
def dump_translation_tree_c(t, hname, f):
if type(t) == type({}):
pointers = []
ttype = t["__TYPE__"]
for x in range(0, ilut[ttype][1]): pointers.append(None)
tname = '%s_%s' % (hname, ttype)
res = 'const %s %s = \n' % (ilut[ttype][2], tname)
res += '{\n'
res += ' %s,\n' % ilut[ttype][0]
res += ' { \n'
for h in t:
if h == "__TYPE__":
continue
if type(0) == type(h):
name = dump_translation_tree_c(t[h], hname + '_%02x' % h, f)
else:
name = dump_translation_tree_c(t[h], hname + '_%s' % h, f)
if ttype in ["opcode", "opcode_3dnow", "mmmmm", "pp", "l", "w", "modrmreg", "modrmrm"]:
index = h
else:
index = indexes[h]
try:
pointers[index] = name
except:
print(index, name)
print("fail fail fail", index)
i = 0
for p in pointers:
if not p:
res += ' /* %02x */ NULL,\n' % i
else:
res += ' /* %02x */ (const void *)&%s,\n' % (i, p)
i += 1
res += ' }\n'
res += '};\n\n'
if ttype == "root":
f.write("const PND_TABLE %s = (const PND_TABLE)&%s;\n\n" % (hname, name))
else:
f.write(res)
return tname
else:
# Instruction, construct a dummy table that directly points to the instruction.
name = '%s_leaf' % hname
res = 'const ND_TABLE_INSTRUCTION %s = \n' % name
res += '{\n'
res += ' ND_ILUT_INSTRUCTION,\n'
res += ' (const void *)&gInstructions[%d]\n' % t.Icount
res += '};\n\n'
f.write(res)
return name
def generate_features(features, fname):
f = open(fname, 'wt')
f.write('#ifndef CPUID_FLAGS_H\n')
f.write('#define CPUID_FLAGS_H\n')
f.write('\n')
f.write('#define ND_CFF_NO_LEAF 0xFFFFFFFF\n')
f.write('#define ND_CFF_NO_SUBLEAF 0x00FFFFFF\n')
f.write('\n')
f.write('\n')
f.write('#define ND_CFF(leaf, subleaf, reg, bit) ((uint64_t)(leaf) | ((uint64_t)((subleaf) & 0xFFFFFF) << 32) | ((uint64_t)(reg) << 56) | ((uint64_t)(bit) << 59))\n')
f.write('\n')
for c in features:
f.write('#define ND_CFF_%s%sND_CFF(%s, %s, %s, %s)\n' % (c.Name, ' ' * (25 - len(c.Name)), c.Leaf, c.SubLeaf, 'REG_' + c.Reg, c.Bit))
f.write('\n')
f.write('#endif // CPUID_FLAGS_H\n')
#
# =============================================================================
# Main
# =============================================================================
#
if __name__ == "__main__":
if len(sys.argv) < 2:
print('Usage: %s defs-file' % os.path.basename(sys.argv[0]))
sys.exit(-1)
# Extract the flags.
print('Loading flags access templates...')
flagsaccess = disasmlib.parse_flags_file('%s/flags.dat' % sys.argv[1])
# Extact the CPUID features.
print('Loading CPUID feature flags templates...')
features = disasmlib.parse_cff_file('%s/cpuid.dat' % sys.argv[1])
# Extract the prefixes.
print('Loading prefixes...')
prefixes = disasmlib.parse_pre_file('%s/prefixes.dat' % sys.argv[1])
# Extract the valid modes.
print('Loading CPU operating modes templates...')
insmodes = disasmlib.parse_modess_file('%s/modes.dat' % sys.argv[1])
# Extract the instructions.
for fn in glob.glob('%s/table*.dat' % sys.argv[1]):
print('Loading instructions from %s...' % fn)
instructions = instructions + disasmlib.parse_ins_file(fn, flagsaccess, features, insmodes)
# Sort the instructions.
instructions = sorted(instructions, key = lambda x: x.Mnemonic)
for i in range(0, len(instructions)):
instructions[i].Icount = i
# Generate the translation tree
translations = generate_translations2(instructions)
# Generate the mnemonics
mnemonics = generate_mnemonics(instructions)
mnemonics_prefixes = generate_mnemonics(prefixes)
# Generate the constants
constants = generate_constants(instructions)
constants_prefixes = generate_constants(prefixes, True)
constants_sets, constants_types = generate_constants2(instructions)
#
# Dump all data to files.
#
# Dump the mnemonics
print('Writing the mnemonics.h file...')
dump_mnemonics(mnemonics, mnemonics_prefixes, r'../bddisasm/include/mnemonics.h')
# Dump the instruction constants
print('Writing the constants.h (instruction definitions) file...')
dump_constants(constants, constants_prefixes, constants_sets, constants_types, r'../inc/constants.h')
print('Writing the instructions.h (main instruction database) file...')
generate_master_table(instructions, r'../bddisasm/include/instructions.h')
print('Writing the cpuidflags.h (CPUID feature flags) file...')
generate_features(features, r'../inc/cpuidflags.h')
print('Instruction succesfully parsed & header files generated!')