orbital/tools/ida/analyze_user.py
2018-12-21 02:44:24 +01:00

245 lines
7.7 KiB
Python

#!/usr/bin/env python
import idaapi
import idautils
import json
import re
import sys
# Regex patterns
patterns_function_outside = [
# Found in safemode.elf
"^([0-9A-Za-z:_]+) failed! \(result:%#x\)\n$",
"^([0-9A-Za-z:_]+)\([0-9A-Za-z_, ]*\) : %x\n$",
]
patterns_function_inside = [
# Found in safemode.elf
"^([0-9A-Za-z:_]+) failed! \(standby page is NULL\)\n$",
]
# Binary patterns
patterns_syscall = [
# mov rax, 0xXXXX; mov r10, rcx; syscall; jb $+0x5
"48 C7 C0 ?? ?? 00 00 49 89 CA 0F 05",
]
### Utilities ###
def get_file_path(name):
script_path = os.path.realpath(sys.argv[0])
script_base = os.path.dirname(script_path)
return os.path.join(script_base, name)
### Helpers ###
def get_last_direct_call(block, strict=True):
"""
Iterate predecessor instructions backwards until first direct call.
Strict-mode ensure that the match is actually the last call in the block,
and will return NULL to distinguish it from a no-call scenario.
"""
for head in reversed(list(Heads(block.startEA, block.endEA))):
instr = idautils.DecodeInstruction(head)
mnem = instr.get_canon_mnem()
if mnem != "call":
continue
refs = list(CodeRefsFrom(head, 1))
if len(refs) > 1:
return refs[1]
if strict:
return 0x0
return BADADDR
def get_predecessors(blocks, blacklist=set()):
"""
Get set of predecessor blocks of a given set of blocks.
Optionally, it can be filtered with set of blacklisted blocks.
"""
preds = set()
for block in blocks:
preds |= set(block.preds())
preds = preds.difference(blacklist)
return preds
def rename_function_outside(name, string_ea):
functions = set()
# Get set of functions called prior to basic blocks that xref the string
for instr_xref in XrefsTo(string_ea):
instr_ea = instr_xref.frm
func = idaapi.get_func(instr_ea)
if not func:
continue
cfg = idaapi.FlowChart(func, flags=ida_gdl.FC_PREDS)
# Get predecessor block(s)
preds = {}
blacklist = {}
for block in cfg:
if block.startEA <= instr_ea and block.endEA > instr_ea:
blacklist = {block}
preds = get_predecessors({block}, blacklist)
break
# Scan predecessors recursively for last direct calls
found = False
while True:
for pred in preds:
target_ea = get_last_direct_call(pred)
if target_ea != BADADDR:
if target_ea != 0x0:
functions.add(target_ea)
found = True
# Exit on candidates or no predecessors
if found or not preds:
break
# Update predecessors
blacklist |= preds
preds = get_predecessors(preds, blacklist)
# Ensure we only have exactly one candidate function
if len(functions) != 1:
print "None or multiple candidates detected @ string:0x%X" % (string_ea)
return
# Rename the candidate function
func_ea = next(iter(functions))
print "Renaming function 0x%X to %s" % (func_ea, name)
idc.MakeNameEx(func_ea, name, idc.SN_NOWARN)
def rename_function_inside(name, string_ea):
functions = set()
# Get set of functions that contain xrefs the string
for instr_xref in XrefsTo(string_ea):
instr_ea = instr_xref.frm
func = idaapi.get_func(instr_ea)
functions.add(func.startEA)
# Ensure we only have exactly one candidate function
if len(functions) != 1:
print "None or multiple candidates detected @ string:0x%X" % (string_ea)
return
# Rename the candidate function
func_ea = next(iter(functions))
print "Renaming function 0x%X to %s" % (func_ea, name)
idc.MakeNameEx(func_ea, name, idc.SN_NOWARN)
### Analysis ###
def analyze_functions():
# Reconstruct function names from strings
for pattern in patterns_function_outside:
for string in idautils.Strings():
match = re.match(pattern, str(string))
if match:
rename_function_outside(match.group(1), string.ea)
for pattern in patterns_function_inside:
for string in idautils.Strings():
match = re.match(pattern, str(string))
if match:
rename_function_inside(match.group(1), string.ea)
def analyze_syscalls():
path = get_file_path('db_syscalls.json')
with open(path, 'r') as f:
db = json.load(f)
# Detect and rename syscall wrappers
for pattern in patterns_syscall:
ea = 0x0
while True:
ea = idc.FindBinary(ea+1, idc.SEARCH_DOWN, pattern)
if ea == BADADDR:
break
func = idaapi.get_func(ea)
if not func or func.startEA != ea:
continue
syscall_id = Dword(ea + 0x3)
syscall_name = db.get(str(syscall_id), None)
if syscall_name:
syscall_name = str(syscall_name)
idc.MakeNameEx(ea, syscall_name, idc.SN_NOWARN)
def analyze_nids():
# TODO: Not yet implemented, boot userland executables don't use them.
return
def analyze_qwords():
# Get user boundaries
user_start = BADADDR
user_stop = 0x0
seg_count = 0
for ea in Segments():
seg_count += 1
user_start = min(user_start, SegStart(ea))
user_stop = max(user_stop, SegEnd(ea))
# Transform every potential user pointer to a qword
for i in range(seg_count):
seg = idaapi.getnseg(i)
for ea in range(seg.startEA, seg.endEA, 8):
if get_item_size(ea) >= 8:
continue
value = get_qword(ea)
if user_start <= value < user_stop:
create_qword(ea)
def analyze_prologues():
# Target prologue: push rbp; mov rbp, rsp
pattern = "55 48 89"
# For each user code segment
for ea in Segments():
if ida_segment.segtype(ea) != SEG_CODE:
continue
user_start = SegStart(ea)
user_stop = SegEnd(ea)
# Find solitary prologues
ea = user_start
while True:
ea = idc.FindBinary(ea+1, idc.SEARCH_DOWN, pattern)
if ea > user_stop:
break
func = idaapi.get_func(ea)
if func is None:
idc.MakeFunction(ea)
def analyze_types():
path = get_file_path('db_types.json')
with open(path, 'r') as f:
db = json.load(f)
for ea in Segments():
if ida_segment.segtype(ea) != SEG_CODE:
continue
for func_addr in Functions(SegStart(ea), SegEnd(ea)):
func_name = GetFunctionName(func_addr)
if func_name.startswith('sub_'):
continue
db_type = db.get(func_name, None)
if not db_type:
continue
flags = 1 | 2 | 4 # PT_SIL | PT_NDC | PT_TYP
db_type = str(db_type)
t = parse_decl(db_type, flags)
if not t:
print "Failed to apply type: %s" % db_type
continue
ida_typeinf.apply_type(None, t[1], t[2], func_addr, TINFO_DEFINITE)
### Main ###
def main():
"""
The order of the following analysis stages is not arbitrary:
They are ordered according to these two rules:
1. Analysis dependencies must be considered,
e.g. first detect functions, then process functions.
2. Analysis with higher success rate go last.
e.g. first do pattern based search, then rename syscalls.
"""
# Detection stage
analyze_qwords()
analyze_prologues()
# Assigning names
analyze_functions()
analyze_syscalls()
analyze_nids()
# Assigning types
analyze_types()
if __name__ == '__main__':
main()