mirror of
https://github.com/AlexAltea/orbital.git
synced 2025-04-02 10:32:05 -04:00
245 lines
7.7 KiB
Python
245 lines
7.7 KiB
Python
#!/usr/bin/env python
|
|
|
|
import idaapi
|
|
import idautils
|
|
import json
|
|
import re
|
|
import sys
|
|
|
|
# Regex patterns
|
|
patterns_function_outside = [
|
|
# Found in safemode.elf
|
|
"^([0-9A-Za-z:_]+) failed! \(result:%#x\)\n$",
|
|
"^([0-9A-Za-z:_]+)\([0-9A-Za-z_, ]*\) : %x\n$",
|
|
]
|
|
patterns_function_inside = [
|
|
# Found in safemode.elf
|
|
"^([0-9A-Za-z:_]+) failed! \(standby page is NULL\)\n$",
|
|
]
|
|
|
|
# Binary patterns
|
|
patterns_syscall = [
|
|
# mov rax, 0xXXXX; mov r10, rcx; syscall; jb $+0x5
|
|
"48 C7 C0 ?? ?? 00 00 49 89 CA 0F 05",
|
|
]
|
|
|
|
### Utilities ###
|
|
|
|
def get_file_path(name):
|
|
script_path = os.path.realpath(sys.argv[0])
|
|
script_base = os.path.dirname(script_path)
|
|
return os.path.join(script_base, name)
|
|
|
|
### Helpers ###
|
|
|
|
def get_last_direct_call(block, strict=True):
|
|
"""
|
|
Iterate predecessor instructions backwards until first direct call.
|
|
Strict-mode ensure that the match is actually the last call in the block,
|
|
and will return NULL to distinguish it from a no-call scenario.
|
|
"""
|
|
for head in reversed(list(Heads(block.startEA, block.endEA))):
|
|
instr = idautils.DecodeInstruction(head)
|
|
mnem = instr.get_canon_mnem()
|
|
if mnem != "call":
|
|
continue
|
|
refs = list(CodeRefsFrom(head, 1))
|
|
if len(refs) > 1:
|
|
return refs[1]
|
|
if strict:
|
|
return 0x0
|
|
return BADADDR
|
|
|
|
def get_predecessors(blocks, blacklist=set()):
|
|
"""
|
|
Get set of predecessor blocks of a given set of blocks.
|
|
Optionally, it can be filtered with set of blacklisted blocks.
|
|
"""
|
|
preds = set()
|
|
for block in blocks:
|
|
preds |= set(block.preds())
|
|
preds = preds.difference(blacklist)
|
|
return preds
|
|
|
|
def rename_function_outside(name, string_ea):
|
|
functions = set()
|
|
# Get set of functions called prior to basic blocks that xref the string
|
|
for instr_xref in XrefsTo(string_ea):
|
|
instr_ea = instr_xref.frm
|
|
func = idaapi.get_func(instr_ea)
|
|
if not func:
|
|
continue
|
|
cfg = idaapi.FlowChart(func, flags=ida_gdl.FC_PREDS)
|
|
# Get predecessor block(s)
|
|
preds = {}
|
|
blacklist = {}
|
|
for block in cfg:
|
|
if block.startEA <= instr_ea and block.endEA > instr_ea:
|
|
blacklist = {block}
|
|
preds = get_predecessors({block}, blacklist)
|
|
break
|
|
# Scan predecessors recursively for last direct calls
|
|
found = False
|
|
while True:
|
|
for pred in preds:
|
|
target_ea = get_last_direct_call(pred)
|
|
if target_ea != BADADDR:
|
|
if target_ea != 0x0:
|
|
functions.add(target_ea)
|
|
found = True
|
|
# Exit on candidates or no predecessors
|
|
if found or not preds:
|
|
break
|
|
# Update predecessors
|
|
blacklist |= preds
|
|
preds = get_predecessors(preds, blacklist)
|
|
|
|
# Ensure we only have exactly one candidate function
|
|
if len(functions) != 1:
|
|
print "None or multiple candidates detected @ string:0x%X" % (string_ea)
|
|
return
|
|
# Rename the candidate function
|
|
func_ea = next(iter(functions))
|
|
print "Renaming function 0x%X to %s" % (func_ea, name)
|
|
idc.MakeNameEx(func_ea, name, idc.SN_NOWARN)
|
|
|
|
def rename_function_inside(name, string_ea):
|
|
functions = set()
|
|
# Get set of functions that contain xrefs the string
|
|
for instr_xref in XrefsTo(string_ea):
|
|
instr_ea = instr_xref.frm
|
|
func = idaapi.get_func(instr_ea)
|
|
functions.add(func.startEA)
|
|
# Ensure we only have exactly one candidate function
|
|
if len(functions) != 1:
|
|
print "None or multiple candidates detected @ string:0x%X" % (string_ea)
|
|
return
|
|
# Rename the candidate function
|
|
func_ea = next(iter(functions))
|
|
print "Renaming function 0x%X to %s" % (func_ea, name)
|
|
idc.MakeNameEx(func_ea, name, idc.SN_NOWARN)
|
|
|
|
|
|
### Analysis ###
|
|
|
|
def analyze_functions():
|
|
# Reconstruct function names from strings
|
|
for pattern in patterns_function_outside:
|
|
for string in idautils.Strings():
|
|
match = re.match(pattern, str(string))
|
|
if match:
|
|
rename_function_outside(match.group(1), string.ea)
|
|
for pattern in patterns_function_inside:
|
|
for string in idautils.Strings():
|
|
match = re.match(pattern, str(string))
|
|
if match:
|
|
rename_function_inside(match.group(1), string.ea)
|
|
|
|
def analyze_syscalls():
|
|
path = get_file_path('db_syscalls.json')
|
|
with open(path, 'r') as f:
|
|
db = json.load(f)
|
|
# Detect and rename syscall wrappers
|
|
for pattern in patterns_syscall:
|
|
ea = 0x0
|
|
while True:
|
|
ea = idc.FindBinary(ea+1, idc.SEARCH_DOWN, pattern)
|
|
if ea == BADADDR:
|
|
break
|
|
func = idaapi.get_func(ea)
|
|
if not func or func.startEA != ea:
|
|
continue
|
|
syscall_id = Dword(ea + 0x3)
|
|
syscall_name = db.get(str(syscall_id), None)
|
|
if syscall_name:
|
|
syscall_name = str(syscall_name)
|
|
idc.MakeNameEx(ea, syscall_name, idc.SN_NOWARN)
|
|
|
|
def analyze_nids():
|
|
# TODO: Not yet implemented, boot userland executables don't use them.
|
|
return
|
|
|
|
def analyze_qwords():
|
|
# Get user boundaries
|
|
user_start = BADADDR
|
|
user_stop = 0x0
|
|
seg_count = 0
|
|
for ea in Segments():
|
|
seg_count += 1
|
|
user_start = min(user_start, SegStart(ea))
|
|
user_stop = max(user_stop, SegEnd(ea))
|
|
|
|
# Transform every potential user pointer to a qword
|
|
for i in range(seg_count):
|
|
seg = idaapi.getnseg(i)
|
|
for ea in range(seg.startEA, seg.endEA, 8):
|
|
if get_item_size(ea) >= 8:
|
|
continue
|
|
value = get_qword(ea)
|
|
if user_start <= value < user_stop:
|
|
create_qword(ea)
|
|
|
|
def analyze_prologues():
|
|
# Target prologue: push rbp; mov rbp, rsp
|
|
pattern = "55 48 89"
|
|
# For each user code segment
|
|
for ea in Segments():
|
|
if ida_segment.segtype(ea) != SEG_CODE:
|
|
continue
|
|
user_start = SegStart(ea)
|
|
user_stop = SegEnd(ea)
|
|
# Find solitary prologues
|
|
ea = user_start
|
|
while True:
|
|
ea = idc.FindBinary(ea+1, idc.SEARCH_DOWN, pattern)
|
|
if ea > user_stop:
|
|
break
|
|
func = idaapi.get_func(ea)
|
|
if func is None:
|
|
idc.MakeFunction(ea)
|
|
|
|
def analyze_types():
|
|
path = get_file_path('db_types.json')
|
|
with open(path, 'r') as f:
|
|
db = json.load(f)
|
|
for ea in Segments():
|
|
if ida_segment.segtype(ea) != SEG_CODE:
|
|
continue
|
|
for func_addr in Functions(SegStart(ea), SegEnd(ea)):
|
|
func_name = GetFunctionName(func_addr)
|
|
if func_name.startswith('sub_'):
|
|
continue
|
|
db_type = db.get(func_name, None)
|
|
if not db_type:
|
|
continue
|
|
flags = 1 | 2 | 4 # PT_SIL | PT_NDC | PT_TYP
|
|
db_type = str(db_type)
|
|
t = parse_decl(db_type, flags)
|
|
if not t:
|
|
print "Failed to apply type: %s" % db_type
|
|
continue
|
|
ida_typeinf.apply_type(None, t[1], t[2], func_addr, TINFO_DEFINITE)
|
|
|
|
### Main ###
|
|
|
|
def main():
|
|
"""
|
|
The order of the following analysis stages is not arbitrary:
|
|
They are ordered according to these two rules:
|
|
1. Analysis dependencies must be considered,
|
|
e.g. first detect functions, then process functions.
|
|
2. Analysis with higher success rate go last.
|
|
e.g. first do pattern based search, then rename syscalls.
|
|
"""
|
|
# Detection stage
|
|
analyze_qwords()
|
|
analyze_prologues()
|
|
# Assigning names
|
|
analyze_functions()
|
|
analyze_syscalls()
|
|
analyze_nids()
|
|
# Assigning types
|
|
analyze_types()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|