123 lines
2.7 KiB
Python
123 lines
2.7 KiB
Python
|
from idc import *
|
||
|
import sys
|
||
|
import binascii
|
||
|
from sets import Set
|
||
|
import sys
|
||
|
import re
|
||
|
|
||
|
p_spaces = re.compile(r'(\s+)')
|
||
|
|
||
|
# Apply fixes to IDA opcode
|
||
|
def ida_disasm_fix(insn_binary, insn_str):
|
||
|
# Remove extra spaces and tabs. Replace tabs with spaces
|
||
|
insn_str = p_spaces.sub(r' ', insn_str)
|
||
|
return insn_str
|
||
|
|
||
|
def get_insn(ea, sz):
|
||
|
s = ''
|
||
|
for i in range(0, sz):
|
||
|
s += chr(Byte(ea + i))
|
||
|
return s
|
||
|
|
||
|
def insn_write(f, insn_binary, insn_str, header):
|
||
|
assert len(insn_binary) != 0
|
||
|
s = ''
|
||
|
sz = len(insn_binary)
|
||
|
if header:
|
||
|
s += '{%d, "' % sz
|
||
|
for i in range(0, sz):
|
||
|
s += '\\x%02x' % ord(insn_binary[i])
|
||
|
s += '", "' + insn_str + '"},\n'
|
||
|
else:
|
||
|
s += binascii.hexlify(insn_binary)
|
||
|
s += (' %s\n' % insn_str)
|
||
|
f.write(s)
|
||
|
f.flush()
|
||
|
|
||
|
# Normalize operand (replace numeric operands with -1)
|
||
|
def normalize_operand(op_type, op_str):
|
||
|
if op_type in [o_mem, o_displ, o_imm, o_near, o_far]:
|
||
|
return "-1"
|
||
|
else:
|
||
|
return op_str
|
||
|
|
||
|
def is_unique(set, ea):
|
||
|
ot1 = GetOpType(ea, 0)
|
||
|
ot2 = GetOpType(ea, 1)
|
||
|
ot3 = GetOpType(ea, 2)
|
||
|
v1 = GetOpnd(ea, 0)
|
||
|
v2 = GetOpnd(ea, 1)
|
||
|
v3 = GetOpnd(ea, 2)
|
||
|
mnem = GetMnem(ea)
|
||
|
hashstr = "%s|%s|%s|%s" % (mnem,
|
||
|
normalize_operand(ot1, v1),
|
||
|
normalize_operand(ot2, v2),
|
||
|
normalize_operand(ot3, v3))
|
||
|
if hashstr in set:
|
||
|
return False
|
||
|
else:
|
||
|
set.add(hashstr)
|
||
|
return True
|
||
|
|
||
|
def iteration(f, set, ea, n, data, prev_mnem):
|
||
|
PatchDword(ea, data)
|
||
|
sz = MakeCode(ea)
|
||
|
if sz == 0:
|
||
|
return (prev_mnem, n)
|
||
|
str = GetDisasm(ea)
|
||
|
mnem = GetMnem(ea)
|
||
|
if prev_mnem != mnem:
|
||
|
# Opcode changed, purge cache set
|
||
|
set.clear()
|
||
|
prev_mnem = mnem
|
||
|
# Now we got disasm
|
||
|
# Remove comments
|
||
|
pos = str.find(';')
|
||
|
if pos != -1:
|
||
|
str = str[0:pos]
|
||
|
# Remove spaces at start and end
|
||
|
str = str.strip(' ')
|
||
|
if str == '':
|
||
|
return (prev_mnem, n)
|
||
|
if not is_unique(set, ea):
|
||
|
return (prev_mnem, n)
|
||
|
insn_binary = get_insn(ea, sz)
|
||
|
# Add unique disasms to file
|
||
|
str = ida_disasm_fix(insn_binary, str)
|
||
|
insn_write(f, insn_binary, str, False)
|
||
|
n += 1
|
||
|
if n % 1000 == 0:
|
||
|
print '%d opcodes processed' % n
|
||
|
return (prev_mnem, n)
|
||
|
|
||
|
def generate_arm_thumb(filename):
|
||
|
set = Set()
|
||
|
ea = GetEntryPoint(GetEntryOrdinal(0))
|
||
|
for i in range(0, 20):
|
||
|
PatchByte(ea + i, i)
|
||
|
f = open(filename, 'wt')
|
||
|
data = 0L
|
||
|
mnem = ''
|
||
|
data = 0
|
||
|
n = 0
|
||
|
while data <= 0xffff:
|
||
|
prefix = data >> (32 - 5)
|
||
|
if prefix in [0x1d, 0x1e, 0x1f]:
|
||
|
data2 = 0
|
||
|
while data2 <= 0xffff:
|
||
|
(mnem, n) = iteration(f, set, ea, n, data | (data2 << 16), mnem)
|
||
|
data2 += 1
|
||
|
else:
|
||
|
mnem = iteration(f, set, ea, n, data | 0xffff0000, mnem)
|
||
|
data += 1
|
||
|
f.close()
|
||
|
print 'Finished'
|
||
|
|
||
|
def main():
|
||
|
generate_arm_thumb("./thumb-opcodes.txt")
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|
||
|
|
||
|
|