From 17e57e2772022c9e27792a5fc567fc26d62489df Mon Sep 17 00:00:00 2001 From: sschriner Date: Fri, 25 Sep 2020 11:47:25 -0400 Subject: [PATCH 01/17] Start code move --- .../heap_tracking/hook_malloc_library.py | 236 ++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 manticore/native/heap_tracking/hook_malloc_library.py diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py new file mode 100644 index 000000000..4df2db921 --- /dev/null +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -0,0 +1,236 @@ +from manticore.native.state import State +from manticore.native import Manticore +from malloc_lib_data import MallocLibData + +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(2) + + +# Globals that will become class members to control amount of information being retrieved +# TODO(Sonya): fine tune these a little bit - need to be automated +HOOK_SBRK_INFO = True +HOOK_MMAP_INFO = True +HOOK_MALLOC_RETURN = True +HOOK_FREE_RETURN = True +""" +TODO(Sonya): class conversion +class hook_malloc_library: + def __init__(self, m: Manticore, sbrk:int, mmap:int, munmap:int, malloc:int, free:int, calloc=None, realloc=None, + HOOK_SBRK_INFO: bool = True, HOOK_MMAP_INFO: bool = True, HOOK_MALLOC_RETURN: bool = True, HOOK_FREE_RETURN: bool = True): + self.sbrk = sbrk + self.mmap = mmap + self.munmap = munmap + self.malloc = malloc + self.free = free + +""" + + +def load_ret_addr(func: str, state: State): + """ Loads the return address of a function from the stack + (Assuming the next instruction to be executed is the start of a function call) + """ + stack_location = state.cpu.read_register("RSP") + ret_addr = state.cpu.read_int(stack_location, state.cpu.address_bit_size) + logger.debug(f"Adding a hook for {func} callsite in state: {state.id}") + return ret_addr + + +def hook_malloc_lib(initial_state: State, sbrk: int, mmap: int, munmap: int, malloc: int, free: int): + """ Function to add malloc hooks and do prep work + - TODO(Sonya): would like this to eventially be __init__() method for a class + once manticore hook callbacks have been debugged. + (from Eric) See: https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L163-L218 + & https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L274-L278 to work on debugging this + """ + initial_state.context["malloc_lib"] = MallocLibData() + + # Hook malloc and free + initial_state.add_hook(malloc, hook_malloc, after=False) + initial_state.add_hook(free, hook_free, after=False) + + initial_state.context['sbrk'] = sbrk + initial_state.context['mmap'] = mmap + initial_state.context['munmap'] = munmap + + +def hook_mmap_return(state: State): + """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + """ + ret_val = state.cpu.read_register("RAX") + logger.info(f"mmap ret val: {hex(ret_val)}") + + state.context["malloc_lib"].process_mmap(ret_val, state.context["mmap_args"]) + del state.context["mmap_args"] + + logger.debug(f"Unhooking mmap return in malloc in state: {state.id}") + state.remove_hook(state.cpu.read_register("RIP"), hook_mmap_return) + + +def hook_mmap(state: State): + """ Hook to process mmap information and add a function hook to the callsite of mmap (which should + be inside the free or another function inside of free which calls mmap), post execution of the + mmap call. + """ + # TODO(Sonya): per Eric's suggestion - + # check out manticore invoke model code to find function that will extract all these args + args = [] + args.append(state.cpu.read_register("RDI")) # void *addr + args.append(state.cpu.read_register("RSI")) # size_t length + args.append(state.cpu.read_register("RDX")) # int prot + args.append(state.cpu.read_register("RCX")) # int flags + args.append(state.cpu.read_register("R8")) # int fd + args.append(state.cpu.read_register("R9")) # off_t offset + logger.info(f"Invoking mmap in malloc. Args {args}") + state.context["mmap_args"] = args + + ret_addr = load_ret_addr("mmap", state) + state.add_hook(ret_addr, hook_mmap_return, after=False) + +# NOTE(Sonya): If I can't find the internal sbrk address I can get to manticore brk. +# .....so I can calculate: sbrk_chunk size = curr_brk - new_brk, sbrk_ret_val = new_brk +# where new_brk is the argument passed into brk - see brk and sbrk man pages +# https://github.com/trailofbits/manticore/blob/f46f78b69bd440af144f19ec97695ec7e911a374/manticore/platforms/linux.py#L1864 +# state.platform.brk gives current brk +def hook_sbrk_return(state: State): + """ Hook to process sbrk return information and remove the hook to itself at the callsite to sbrk, + post execution of the sbrk function. + """ + ret_val = state.cpu.read_register("RAX") + logger.info(f"sbrk ret val: {hex(ret_val)}") + + state.context["malloc_lib"].process_sbrk(ret_val, state.context["sbrk_size"]) + del state.context["sbrk_size"] + + logger.debug(f"Unhooking sbrk return in malloc in state: {state.id}") + state.remove_hook(state.cpu.read_register("RIP"), hook_sbrk_return) + + +def hook_sbrk(state: State): + """ Hook to process sbrk information and add a function hook to the callsite of sbrk (which should + be inside malloc or another function inside of malloc which calls sbrk), post execution of the + sbrk call. + """ + # Get %rdi is first arg reg get request size from it + request_size = state.cpu.read_register("RDI") + logger.info(f"Invoking sbrk in malloc. Request Size {request_size}") + state.context["sbrk_size"] = request_size + + # Pull return address off the stack and add a hook for it + ret_addr = load_ret_addr("sbrk", state) + state.add_hook(ret_addr, hook_sbrk_return, after=False) + + +def hook_malloc_return(state: State): + """ Hook to process malloc information and remove function hooks at the return address, + post execution of the malloc function. + """ + ret_val = state.cpu.read_register("RAX") + logger.info(f"malloc ret val: {hex(ret_val)}") + state.context["malloc_lib"].process_malloc(ret_val, state.context["malloc_size"]) + del state.context["malloc_size"] + + if HOOK_SBRK_INFO: + logger.debug((f"Unhooking sbrk in state: {state.id}")) + state.remove_hook(state.context["sbrk"], hook_sbrk) + + if HOOK_MMAP_INFO: + logger.debug(f"Unhooking mmap in state: {state.id}") + state.remove_hook(state.context["mmap"], hook_mmap) + + logger.debug(f"Unhooking malloc return in state: {state.id}") + state.remove_hook(state.cpu.read_register("RIP"), hook_malloc_return) + + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_malloc(state: State): + """ Hook to process malloc information and add function hooks at malloc function start, + pre-execution of the malloc function. + """ + # Get request size + malloc_size = state.cpu.read_register("RDI") + logger.info(f"Invoking malloc for size: {malloc_size}") + state.context["malloc_size"] = malloc_size + + # Hook sbrk + if HOOK_SBRK_INFO: + logger.debug(f"Adding Hook for sbrk in state: {state.id}") + state.add_hook(state.context['sbrk'], hook_sbrk, after=False) + + # Hook mmap + if HOOK_MMAP_INFO: + logger.debug(f"Adding Hook for mmap in state: {state.id}") + state.add_hook(state.context["mmap"], hook_mmap, after=False) + + # Hook Return Address + if HOOK_MALLOC_RETURN: + ret_addr = load_ret_addr("malloc", state) + state.add_hook(ret_addr, hook_malloc_return, after=False) + + +def hook_munmap_return(state: State): + """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + """ + ret_val = state.cpu.read_register("RAX") + logger.info(f"munmap ret val: {hex(ret_val)}") + + logger.debug(f"Unhooking munmap return in malloc in state: {state.id}") + state.remove_hook(state.cpu.read_register("RIP"), hook_munmap_return) + + +def hook_munmap(state: State): + """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside the free or another function inside of free which calls munmap), post execution of the + munmap call. + """ + addr = state.cpu.read_register("RDI") # void *addr + length = state.cpu.read_register("RSI") # size_t length + logger.info(f"Invoking munmap in malloc. Args {addr}, {length}") + + state.context["malloc_lib"].process_munmap(addr, length) + + ret_addr = load_ret_addr("munmap", state) + state.add_hook(ret_addr, hook_munmap_return, after=False) + + +def hook_free_return(state: State): + """ Hook to process free information and remove function hooks at the callsite, + post execution of the free function. + """ + logger.info(f"Free has no return value") + + if HOOK_MMAP_INFO: + logger.debug(f"Unhooking munmap in state: {state.id}") + state.remove_hook(state.context['munmap'], hook_munmap) + + logger.debug(f"Unhooking free return in state: {state.id}") + state.remove_hook(state.cpu.read_register("RIP"), hook_free_return) + + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_free(state: State): + """ Hook to process free information and add function hooks at free function start, + pre-execution of the free function. + """ + # Get free address + free_address = state.cpu.read_register("RDI") + logger.info(f"Attempting to free: {hex(free_address)}") + state.context["malloc_lib"].process_free(free_address) + + # Hook munmap + if HOOK_MMAP_INFO: + logger.debug(f"Adding Hook for munmap in state: {state.id}") + state.add_hook(state.context['munmap'], hook_munmap, after=False) + + # Hook free return address + if HOOK_FREE_RETURN: + ret_addr = load_ret_addr("free", state) + state.add_hook(ret_addr, hook_free_return, after=False) From 05581e78e6e60570848fd2a6365afff418bb6010 Mon Sep 17 00:00:00 2001 From: sschriner Date: Fri, 25 Sep 2020 13:07:50 -0400 Subject: [PATCH 02/17] Added information class --- .../heap_tracking/hook_malloc_library.py | 2 +- .../native/heap_tracking/malloc_lib_data.py | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 manticore/native/heap_tracking/malloc_lib_data.py diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 4df2db921..9cddaecae 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -1,6 +1,6 @@ from manticore.native.state import State from manticore.native import Manticore -from malloc_lib_data import MallocLibData +from manticore.native.heap_tracking.malloc_lib_data import MallocLibData import logging diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py new file mode 100644 index 000000000..bf8d69274 --- /dev/null +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -0,0 +1,51 @@ +from typing import List, Dict, Tuple +from dataclasses import dataclass, field +import json + +# Data Class to hold malloc_lib information +# - This is added to state 0 pre-manticore execution and will be saving state specific information as manticore +# forks and different program paths are found +@dataclass +class MallocLibData: + """This class holds the malloc library data in a specific state (or on specific program path).""" + + malloc_calls: List[Tuple[int, int]] = field(default_factory=list) + free_calls: List[int] = field(default_factory=list) + sbrk_chunks: List[Tuple[int, int]] = field(default_factory=list) + mmap_chunks: Dict[int, int] = field(default_factory=dict) + munmap_chunks: Dict[int, int] = field(default_factory=dict) + + def __str__(self): + # TODO(Sonya): This does not print address information in hexadecimal + return f"malloc calls: {self.malloc_calls}\nfree calls: {self.free_calls}\nsbrk chunks: {self.sbrk_chunks}\nmmap chunks: {self.mmap_chunks}\n" + + def _save_to_file(self, state_id: int): + data = {'malloc_calls': self.malloc_calls, 'free_calls': self.free_calls, 'sbrk_chunks': self.sbrk_chunks, 'mmap_chunks': self.mmap_chunks} + with open(f"m_out/malloc_{state_id}.json", "w+") as write_file: + json.dump(data, write_file, indent=4) + + # TODO(Sonya): Add some more methods here for helpful semantics of recording/retrieving information + # Might want to annotate all this with instruction address information + def process_malloc(self, ret_addr: int, size: int): + # should add malloc call information to list + self.malloc_calls.append((ret_addr, size)) + + def process_free(self, free_addr: int): + # Maybe remove from malloc list and add to a used_and_free list + self.free_calls.append(free_addr) + + # TODO(Sonya): Add other malloc library functions here + + def process_sbrk(self, ret_addr: int, size: int): + # check last chunk added to list + # if size + address == new starting address of chunk -> add new chunk size to last allocated chunk + # else -> add a new chunk to the list + self.sbrk_chunks.append((ret_addr, size)) + + def process_mmap(self, ret_addr: int, args: List): + # add new chunk to the mmap_list + self.mmap_chunks[ret_addr] = args + + def process_munmap(self, addr: int, length: int): + # remove from mmap list and add to the munmaped list + self.munmap_chunks[addr] = length \ No newline at end of file From 429e949313323c6cc8c5628b22cec94dd234edef Mon Sep 17 00:00:00 2001 From: sschriner Date: Fri, 4 Dec 2020 12:38:04 -0500 Subject: [PATCH 03/17] Generalized some of the register names - still need to fix reg names in hook_mmap --- .../heap_tracking/hook_malloc_library.py | 53 ++++++++----------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 9cddaecae..ab93b8112 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -8,37 +8,25 @@ logger.setLevel(2) -# Globals that will become class members to control amount of information being retrieved +# Globals that will become class members to control amount of information being retrieved # TODO(Sonya): fine tune these a little bit - need to be automated HOOK_SBRK_INFO = True HOOK_MMAP_INFO = True HOOK_MALLOC_RETURN = True HOOK_FREE_RETURN = True -""" -TODO(Sonya): class conversion -class hook_malloc_library: - def __init__(self, m: Manticore, sbrk:int, mmap:int, munmap:int, malloc:int, free:int, calloc=None, realloc=None, - HOOK_SBRK_INFO: bool = True, HOOK_MMAP_INFO: bool = True, HOOK_MALLOC_RETURN: bool = True, HOOK_FREE_RETURN: bool = True): - self.sbrk = sbrk - self.mmap = mmap - self.munmap = munmap - self.malloc = malloc - self.free = free - -""" def load_ret_addr(func: str, state: State): """ Loads the return address of a function from the stack (Assuming the next instruction to be executed is the start of a function call) """ - stack_location = state.cpu.read_register("RSP") + stack_location = state.cpu.read_register("STACK") ret_addr = state.cpu.read_int(stack_location, state.cpu.address_bit_size) logger.debug(f"Adding a hook for {func} callsite in state: {state.id}") return ret_addr -def hook_malloc_lib(initial_state: State, sbrk: int, mmap: int, munmap: int, malloc: int, free: int): +def hook_malloc_lib(initial_state: State, malloc: int, free: int): """ Function to add malloc hooks and do prep work - TODO(Sonya): would like this to eventially be __init__() method for a class once manticore hook callbacks have been debugged. @@ -46,14 +34,15 @@ def hook_malloc_lib(initial_state: State, sbrk: int, mmap: int, munmap: int, mal & https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L274-L278 to work on debugging this """ initial_state.context["malloc_lib"] = MallocLibData() - + # Hook malloc and free initial_state.add_hook(malloc, hook_malloc, after=False) initial_state.add_hook(free, hook_free, after=False) - - initial_state.context['sbrk'] = sbrk - initial_state.context['mmap'] = mmap - initial_state.context['munmap'] = munmap + + # Fixme: with syscall specific hooks + initial_state.context["sbrk"] = 0x0 + initial_state.context["mmap"] = 0x0 + initial_state.context["munmap"] = 0x0 def hook_mmap_return(state: State): @@ -68,7 +57,7 @@ def hook_mmap_return(state: State): del state.context["mmap_args"] logger.debug(f"Unhooking mmap return in malloc in state: {state.id}") - state.remove_hook(state.cpu.read_register("RIP"), hook_mmap_return) + state.remove_hook(state.cpu.read_register("PC"), hook_mmap_return) def hook_mmap(state: State): @@ -91,6 +80,7 @@ def hook_mmap(state: State): ret_addr = load_ret_addr("mmap", state) state.add_hook(ret_addr, hook_mmap_return, after=False) + # NOTE(Sonya): If I can't find the internal sbrk address I can get to manticore brk. # .....so I can calculate: sbrk_chunk size = curr_brk - new_brk, sbrk_ret_val = new_brk # where new_brk is the argument passed into brk - see brk and sbrk man pages @@ -107,7 +97,7 @@ def hook_sbrk_return(state: State): del state.context["sbrk_size"] logger.debug(f"Unhooking sbrk return in malloc in state: {state.id}") - state.remove_hook(state.cpu.read_register("RIP"), hook_sbrk_return) + state.remove_hook(state.cpu.read_register("PC"), hook_sbrk_return) def hook_sbrk(state: State): @@ -136,14 +126,14 @@ def hook_malloc_return(state: State): if HOOK_SBRK_INFO: logger.debug((f"Unhooking sbrk in state: {state.id}")) - state.remove_hook(state.context["sbrk"], hook_sbrk) + #state.remove_hook(state.context["sbrk"], hook_sbrk) if HOOK_MMAP_INFO: logger.debug(f"Unhooking mmap in state: {state.id}") - state.remove_hook(state.context["mmap"], hook_mmap) + #state.remove_hook(state.context["mmap"], hook_mmap) logger.debug(f"Unhooking malloc return in state: {state.id}") - state.remove_hook(state.cpu.read_register("RIP"), hook_malloc_return) + state.remove_hook(state.cpu.read_register("PC"), hook_malloc_return) logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") @@ -160,12 +150,13 @@ def hook_malloc(state: State): # Hook sbrk if HOOK_SBRK_INFO: logger.debug(f"Adding Hook for sbrk in state: {state.id}") - state.add_hook(state.context['sbrk'], hook_sbrk, after=False) + # state.add_hook("sbrk", hook_sbrk, after=False) + #state.add_hook(state.context["sbrk"], hook_sbrk, after=False) # Hook mmap if HOOK_MMAP_INFO: logger.debug(f"Adding Hook for mmap in state: {state.id}") - state.add_hook(state.context["mmap"], hook_mmap, after=False) + #state.add_hook(state.context["mmap"], hook_mmap, after=False) # Hook Return Address if HOOK_MALLOC_RETURN: @@ -182,7 +173,7 @@ def hook_munmap_return(state: State): logger.info(f"munmap ret val: {hex(ret_val)}") logger.debug(f"Unhooking munmap return in malloc in state: {state.id}") - state.remove_hook(state.cpu.read_register("RIP"), hook_munmap_return) + state.remove_hook(state.cpu.read_register("PC"), hook_munmap_return) def hook_munmap(state: State): @@ -208,10 +199,10 @@ def hook_free_return(state: State): if HOOK_MMAP_INFO: logger.debug(f"Unhooking munmap in state: {state.id}") - state.remove_hook(state.context['munmap'], hook_munmap) + state.remove_hook(state.context["munmap"], hook_munmap) logger.debug(f"Unhooking free return in state: {state.id}") - state.remove_hook(state.cpu.read_register("RIP"), hook_free_return) + state.remove_hook(state.cpu.read_register("PC"), hook_free_return) logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") @@ -228,7 +219,7 @@ def hook_free(state: State): # Hook munmap if HOOK_MMAP_INFO: logger.debug(f"Adding Hook for munmap in state: {state.id}") - state.add_hook(state.context['munmap'], hook_munmap, after=False) + state.add_hook(state.context["munmap"], hook_munmap, after=False) # Hook free return address if HOOK_FREE_RETURN: From b326b0f19953a4e7b419204b7d25b610e68eda6e Mon Sep 17 00:00:00 2001 From: sschriner Date: Thu, 17 Dec 2020 11:48:11 -0500 Subject: [PATCH 04/17] Finished plumbing out arch specific register names --- manticore/native/cpu/aarch64.py | 3 ++ manticore/native/cpu/abstractcpu.py | 10 +++++ manticore/native/cpu/arm.py | 3 ++ manticore/native/cpu/x86.py | 6 +++ .../heap_tracking/hook_malloc_library.py | 44 +++++++++---------- .../native/heap_tracking/malloc_lib_data.py | 11 +++-- 6 files changed, 52 insertions(+), 25 deletions(-) diff --git a/manticore/native/cpu/aarch64.py b/manticore/native/cpu/aarch64.py index 2957c5fb3..3864cd791 100644 --- a/manticore/native/cpu/aarch64.py +++ b/manticore/native/cpu/aarch64.py @@ -5302,6 +5302,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.STACK): yield address + def get_return_reg(self): + return "X0" + def write_result(self, result): self._cpu.X0 = result diff --git a/manticore/native/cpu/abstractcpu.py b/manticore/native/cpu/abstractcpu.py index 7056de023..12754c402 100644 --- a/manticore/native/cpu/abstractcpu.py +++ b/manticore/native/cpu/abstractcpu.py @@ -290,6 +290,16 @@ def get_arguments(self): """ raise NotImplementedError + def get_return_reg(self): + """ + Extract the location a return value will be written to. Produces + a string describing a register where the return value is written to. + + :return: return register name + :rtype: string + """ + raise NotImplementedError + def write_result(self, result): """ Write the result of a model back to the environment. diff --git a/manticore/native/cpu/arm.py b/manticore/native/cpu/arm.py index 6d87aea84..a8d638d92 100644 --- a/manticore/native/cpu/arm.py +++ b/manticore/native/cpu/arm.py @@ -570,6 +570,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.STACK): yield address + def get_return_reg(self): + return "R0" + def write_result(self, result): self._cpu.R0 = result diff --git a/manticore/native/cpu/x86.py b/manticore/native/cpu/x86.py index e839d7268..5491d2816 100644 --- a/manticore/native/cpu/x86.py +++ b/manticore/native/cpu/x86.py @@ -6408,6 +6408,9 @@ def get_arguments(self): for address in self.values_from(base): yield address + def get_return_reg(self): + return "EAX" + def write_result(self, result): self._cpu.EAX = result @@ -6460,6 +6463,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.RSP + word_bytes): yield address + def get_return_reg(self): + return "RAX" + def write_result(self, result): # XXX(yan): Can also return in rdx for wide values. self._cpu.RAX = result diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index ab93b8112..af0b05000 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -50,7 +50,7 @@ def hook_mmap_return(state: State): be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. """ - ret_val = state.cpu.read_register("RAX") + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"mmap ret val: {hex(ret_val)}") state.context["malloc_lib"].process_mmap(ret_val, state.context["mmap_args"]) @@ -65,15 +65,14 @@ def hook_mmap(state: State): be inside the free or another function inside of free which calls mmap), post execution of the mmap call. """ - # TODO(Sonya): per Eric's suggestion - - # check out manticore invoke model code to find function that will extract all these args args = [] - args.append(state.cpu.read_register("RDI")) # void *addr - args.append(state.cpu.read_register("RSI")) # size_t length - args.append(state.cpu.read_register("RDX")) # int prot - args.append(state.cpu.read_register("RCX")) # int flags - args.append(state.cpu.read_register("R8")) # int fd - args.append(state.cpu.read_register("R9")) # off_t offset + args_gen = state._platform._function_abi.get_arguments() + args.append(state.cpu.read_register(next(args_gen))) # void *addr + args.append(state.cpu.read_register(next(args_gen))) # size_t length + args.append(state.cpu.read_register(next(args_gen))) # int prot + args.append(state.cpu.read_register(next(args_gen))) # int flags + args.append(state.cpu.read_register(next(args_gen))) # int fd + args.append(state.cpu.read_register(next(args_gen))) # off_t offset logger.info(f"Invoking mmap in malloc. Args {args}") state.context["mmap_args"] = args @@ -90,7 +89,7 @@ def hook_sbrk_return(state: State): """ Hook to process sbrk return information and remove the hook to itself at the callsite to sbrk, post execution of the sbrk function. """ - ret_val = state.cpu.read_register("RAX") + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"sbrk ret val: {hex(ret_val)}") state.context["malloc_lib"].process_sbrk(ret_val, state.context["sbrk_size"]) @@ -105,8 +104,8 @@ def hook_sbrk(state: State): be inside malloc or another function inside of malloc which calls sbrk), post execution of the sbrk call. """ - # Get %rdi is first arg reg get request size from it - request_size = state.cpu.read_register("RDI") + # Get request size from arg1 + request_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) logger.info(f"Invoking sbrk in malloc. Request Size {request_size}") state.context["sbrk_size"] = request_size @@ -119,18 +118,18 @@ def hook_malloc_return(state: State): """ Hook to process malloc information and remove function hooks at the return address, post execution of the malloc function. """ - ret_val = state.cpu.read_register("RAX") + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"malloc ret val: {hex(ret_val)}") state.context["malloc_lib"].process_malloc(ret_val, state.context["malloc_size"]) del state.context["malloc_size"] if HOOK_SBRK_INFO: logger.debug((f"Unhooking sbrk in state: {state.id}")) - #state.remove_hook(state.context["sbrk"], hook_sbrk) + # state.remove_hook(state.context["sbrk"], hook_sbrk) if HOOK_MMAP_INFO: logger.debug(f"Unhooking mmap in state: {state.id}") - #state.remove_hook(state.context["mmap"], hook_mmap) + # state.remove_hook(state.context["mmap"], hook_mmap) logger.debug(f"Unhooking malloc return in state: {state.id}") state.remove_hook(state.cpu.read_register("PC"), hook_malloc_return) @@ -143,7 +142,7 @@ def hook_malloc(state: State): pre-execution of the malloc function. """ # Get request size - malloc_size = state.cpu.read_register("RDI") + malloc_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) logger.info(f"Invoking malloc for size: {malloc_size}") state.context["malloc_size"] = malloc_size @@ -151,12 +150,12 @@ def hook_malloc(state: State): if HOOK_SBRK_INFO: logger.debug(f"Adding Hook for sbrk in state: {state.id}") # state.add_hook("sbrk", hook_sbrk, after=False) - #state.add_hook(state.context["sbrk"], hook_sbrk, after=False) + # state.add_hook(state.context["sbrk"], hook_sbrk, after=False) # Hook mmap if HOOK_MMAP_INFO: logger.debug(f"Adding Hook for mmap in state: {state.id}") - #state.add_hook(state.context["mmap"], hook_mmap, after=False) + # state.add_hook(state.context["mmap"], hook_mmap, after=False) # Hook Return Address if HOOK_MALLOC_RETURN: @@ -169,7 +168,7 @@ def hook_munmap_return(state: State): be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. """ - ret_val = state.cpu.read_register("RAX") + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"munmap ret val: {hex(ret_val)}") logger.debug(f"Unhooking munmap return in malloc in state: {state.id}") @@ -181,8 +180,9 @@ def hook_munmap(state: State): be inside the free or another function inside of free which calls munmap), post execution of the munmap call. """ - addr = state.cpu.read_register("RDI") # void *addr - length = state.cpu.read_register("RSI") # size_t length + args_gen = state._platform._function_abi.get_arguments() + addr = state.cpu.read_register(next(args_gen)) # void *addr + length = state.cpu.read_register(next(args_gen)) # size_t length logger.info(f"Invoking munmap in malloc. Args {addr}, {length}") state.context["malloc_lib"].process_munmap(addr, length) @@ -212,7 +212,7 @@ def hook_free(state: State): pre-execution of the free function. """ # Get free address - free_address = state.cpu.read_register("RDI") + free_address = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) logger.info(f"Attempting to free: {hex(free_address)}") state.context["malloc_lib"].process_free(free_address) diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py index bf8d69274..ce55f4f8e 100644 --- a/manticore/native/heap_tracking/malloc_lib_data.py +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -20,10 +20,15 @@ def __str__(self): return f"malloc calls: {self.malloc_calls}\nfree calls: {self.free_calls}\nsbrk chunks: {self.sbrk_chunks}\nmmap chunks: {self.mmap_chunks}\n" def _save_to_file(self, state_id: int): - data = {'malloc_calls': self.malloc_calls, 'free_calls': self.free_calls, 'sbrk_chunks': self.sbrk_chunks, 'mmap_chunks': self.mmap_chunks} + data = { + "malloc_calls": self.malloc_calls, + "free_calls": self.free_calls, + "sbrk_chunks": self.sbrk_chunks, + "mmap_chunks": self.mmap_chunks, + } with open(f"m_out/malloc_{state_id}.json", "w+") as write_file: json.dump(data, write_file, indent=4) - + # TODO(Sonya): Add some more methods here for helpful semantics of recording/retrieving information # Might want to annotate all this with instruction address information def process_malloc(self, ret_addr: int, size: int): @@ -48,4 +53,4 @@ def process_mmap(self, ret_addr: int, args: List): def process_munmap(self, addr: int, length: int): # remove from mmap list and add to the munmaped list - self.munmap_chunks[addr] = length \ No newline at end of file + self.munmap_chunks[addr] = length From 6c871e31d87805a7e8d795647ff1639bf8a61246 Mon Sep 17 00:00:00 2001 From: sschriner Date: Tue, 26 Jan 2021 10:17:56 -0500 Subject: [PATCH 05/17] Added hooks for calloc && realloc + cleaned up some functions --- .../heap_tracking/hook_malloc_library.py | 195 ++++++++++++++---- .../native/heap_tracking/malloc_lib_data.py | 15 +- 2 files changed, 169 insertions(+), 41 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index af0b05000..f959995b2 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -3,6 +3,7 @@ from manticore.native.heap_tracking.malloc_lib_data import MallocLibData import logging +from typing import Callable logger = logging.getLogger(__name__) logger.setLevel(2) @@ -14,18 +15,58 @@ HOOK_MMAP_INFO = True HOOK_MALLOC_RETURN = True HOOK_FREE_RETURN = True +HOOK_CALLOC_RETURN = True +HOOK_REALLOC_RETURN = True -def load_ret_addr(func: str, state: State): +def load_ret_addr(state: State) -> int: """ Loads the return address of a function from the stack (Assuming the next instruction to be executed is the start of a function call) """ stack_location = state.cpu.read_register("STACK") ret_addr = state.cpu.read_int(stack_location, state.cpu.address_bit_size) - logger.debug(f"Adding a hook for {func} callsite in state: {state.id}") return ret_addr +def add_ret_hook(func: str, state: State, ret_hook: Callable[[State], None]) -> None: + ret_addr = load_ret_addr(state) + logger.debug(f"Adding a hook for {func} callsite in state: {state.id}") + state.add_hook(ret_addr, ret_hook, after=False) + + +def add_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + logger.debug(f"Adding Hook for munmap in state: {state.id}") + state.add_hook(state.context["munmap"], hook_munmap, after=False) + + +def remove_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + logger.debug(f"Unhooking munmap in state: {state.id}") + state.remove_hook(state.context["munmap"], hook_munmap) + + +def add_sys_allocing_hooks(state: State): + if HOOK_SBRK_INFO: + logger.debug(f"Adding Hook for sbrk in state: {state.id}") + state.add_hook("sbrk", hook_sbrk, after=False) + state.add_hook(state.context["sbrk"], hook_sbrk, after=False) + + if HOOK_MMAP_INFO: + logger.debug(f"Adding Hook for mmap in state: {state.id}") + state.add_hook(state.context["mmap"], hook_mmap, after=False) + + +def remove_sys_allocing_hooks(state: State): + if HOOK_SBRK_INFO: + logger.debug((f"Unhooking sbrk in state: {state.id}")) + state.remove_hook(state.context["sbrk"], hook_sbrk) + + if HOOK_MMAP_INFO: + logger.debug(f"Unhooking mmap in state: {state.id}") + state.remove_hook(state.context["mmap"], hook_mmap) + + def hook_malloc_lib(initial_state: State, malloc: int, free: int): """ Function to add malloc hooks and do prep work - TODO(Sonya): would like this to eventially be __init__() method for a class @@ -38,6 +79,8 @@ def hook_malloc_lib(initial_state: State, malloc: int, free: int): # Hook malloc and free initial_state.add_hook(malloc, hook_malloc, after=False) initial_state.add_hook(free, hook_free, after=False) + # initial_state.add_hook(calloc, hook_calloc, after=False) + # initial_state.add_hook(realloc, hook_realloc, after=False) # Fixme: with syscall specific hooks initial_state.context["sbrk"] = 0x0 @@ -49,6 +92,8 @@ def hook_mmap_return(state: State): """ Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. + + mmap() returns a pointer to the mapped area """ ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"mmap ret val: {hex(ret_val)}") @@ -64,6 +109,8 @@ def hook_mmap(state: State): """ Hook to process mmap information and add a function hook to the callsite of mmap (which should be inside the free or another function inside of free which calls mmap), post execution of the mmap call. + + void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); """ args = [] args_gen = state._platform._function_abi.get_arguments() @@ -76,8 +123,7 @@ def hook_mmap(state: State): logger.info(f"Invoking mmap in malloc. Args {args}") state.context["mmap_args"] = args - ret_addr = load_ret_addr("mmap", state) - state.add_hook(ret_addr, hook_mmap_return, after=False) + add_ret_hook("mmap", state, hook_mmap_return) # NOTE(Sonya): If I can't find the internal sbrk address I can get to manticore brk. @@ -88,6 +134,8 @@ def hook_mmap(state: State): def hook_sbrk_return(state: State): """ Hook to process sbrk return information and remove the hook to itself at the callsite to sbrk, post execution of the sbrk function. + + sbrk() returns the previous program break - on error, (void *) -1 is returned """ ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"sbrk ret val: {hex(ret_val)}") @@ -103,6 +151,8 @@ def hook_sbrk(state: State): """ Hook to process sbrk information and add a function hook to the callsite of sbrk (which should be inside malloc or another function inside of malloc which calls sbrk), post execution of the sbrk call. + + void *sbrk(intptr_t increment); """ # Get request size from arg1 request_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) @@ -110,63 +160,51 @@ def hook_sbrk(state: State): state.context["sbrk_size"] = request_size # Pull return address off the stack and add a hook for it - ret_addr = load_ret_addr("sbrk", state) - state.add_hook(ret_addr, hook_sbrk_return, after=False) + add_ret_hook("sbrk", state, hook_sbrk_return) def hook_malloc_return(state: State): """ Hook to process malloc information and remove function hooks at the return address, post execution of the malloc function. + + malloc() returns a pointer to the allocated memory """ ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"malloc ret val: {hex(ret_val)}") state.context["malloc_lib"].process_malloc(ret_val, state.context["malloc_size"]) del state.context["malloc_size"] - if HOOK_SBRK_INFO: - logger.debug((f"Unhooking sbrk in state: {state.id}")) - # state.remove_hook(state.context["sbrk"], hook_sbrk) - - if HOOK_MMAP_INFO: - logger.debug(f"Unhooking mmap in state: {state.id}") - # state.remove_hook(state.context["mmap"], hook_mmap) + remove_sys_allocing_hooks(state) logger.debug(f"Unhooking malloc return in state: {state.id}") state.remove_hook(state.cpu.read_register("PC"), hook_malloc_return) - logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") def hook_malloc(state: State): """ Hook to process malloc information and add function hooks at malloc function start, pre-execution of the malloc function. + + void *malloc(size_t size); """ # Get request size malloc_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) logger.info(f"Invoking malloc for size: {malloc_size}") state.context["malloc_size"] = malloc_size - # Hook sbrk - if HOOK_SBRK_INFO: - logger.debug(f"Adding Hook for sbrk in state: {state.id}") - # state.add_hook("sbrk", hook_sbrk, after=False) - # state.add_hook(state.context["sbrk"], hook_sbrk, after=False) - - # Hook mmap - if HOOK_MMAP_INFO: - logger.debug(f"Adding Hook for mmap in state: {state.id}") - # state.add_hook(state.context["mmap"], hook_mmap, after=False) + add_sys_allocing_hooks(state) # Hook Return Address if HOOK_MALLOC_RETURN: - ret_addr = load_ret_addr("malloc", state) - state.add_hook(ret_addr, hook_malloc_return, after=False) + add_ret_hook("malloc", state, hook_malloc_return) def hook_munmap_return(state: State): """ Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. + + munmap() returns 0, on failure -1 """ ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"munmap ret val: {hex(ret_val)}") @@ -179,6 +217,8 @@ def hook_munmap(state: State): """ Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside the free or another function inside of free which calls munmap), post execution of the munmap call. + + int munmap(void *addr, size_t length); """ args_gen = state._platform._function_abi.get_arguments() addr = state.cpu.read_register(next(args_gen)) # void *addr @@ -187,41 +227,118 @@ def hook_munmap(state: State): state.context["malloc_lib"].process_munmap(addr, length) - ret_addr = load_ret_addr("munmap", state) - state.add_hook(ret_addr, hook_munmap_return, after=False) + add_ret_hook("munmap", state, hook_munmap_return) def hook_free_return(state: State): """ Hook to process free information and remove function hooks at the callsite, post execution of the free function. + + free() has no return value """ logger.info(f"Free has no return value") - if HOOK_MMAP_INFO: - logger.debug(f"Unhooking munmap in state: {state.id}") - state.remove_hook(state.context["munmap"], hook_munmap) - + remove_sys_freeing_hooks(state) logger.debug(f"Unhooking free return in state: {state.id}") state.remove_hook(state.cpu.read_register("PC"), hook_free_return) - logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") def hook_free(state: State): """ Hook to process free information and add function hooks at free function start, pre-execution of the free function. + + void free(void *ptr); """ # Get free address free_address = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) logger.info(f"Attempting to free: {hex(free_address)}") state.context["malloc_lib"].process_free(free_address) - # Hook munmap - if HOOK_MMAP_INFO: - logger.debug(f"Adding Hook for munmap in state: {state.id}") - state.add_hook(state.context["munmap"], hook_munmap, after=False) + add_sys_freeing_hooks(state) # Hook free return address if HOOK_FREE_RETURN: - ret_addr = load_ret_addr("free", state) - state.add_hook(ret_addr, hook_free_return, after=False) + add_ret_hook("free", state, hook_free_return) + + +def hook_calloc_return(state: State): + """ Hook to process calloc information and remove function hooks at the callsite, + post execution of the calloc function. + + calloc() returns a pointer to the allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"calloc ret val: {hex(ret_val)}") + state.context["malloc_lib"].process_calloc( + state.context["calloc_request"][0], state.context["calloc_request"][1], ret_val + ) + del state.context["calloc_request"] + + remove_sys_allocing_hooks(state) + + logger.debug(f"Unhooking calloc return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), calloc_free_return) + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_calloc(state: State): + """ Hook to process calloc information and add function hooks at calloc function start, + pre-execution of the calloc function. + + void *calloc(size_t nmemb, size_t size); + """ + + nmemb = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + elem_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + logger.info(f"Invoking calloc for {nmemb} element(s) of size: {elem_size}") + state.context["calloc_request"] = (nmemb, elem_size) + + add_sys_allocing_hooks(state) + + # Hook calloc return address + if HOOK_CALLOC_RETURN: + add_ret_hook("calloc", state, hook_calloc_return) + + +def hook_realloc_return(state: State): + """ Hook to process realloc information and remove function hooks at the callsite, + post execution of the realloc function. + + realloc() returns a pointer to the newly allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"realloc ret val: {hex(ret_val)}") + state.context["malloc_lib"].process_realloc( + state.context["realloc_request"][0], ret_val, state.context["realloc_request"][1] + ) + del state.context["realloc_request"] + + remove_sys_allocing_hooks(state) + remove_sys_freeing_hooks(state) + + logger.debug(f"Unhooking realloc return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_realloc_return) + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_realloc(state: State): + """ Hook to process realloc information and add function hooks at realloc function start, + pre-execution of the realloc function. + + void *realloc(void *ptr, size_t size); + """ + + ptr = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + new_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + logger.info(f"Attempting to realloc: {hex(ptr)} to a requested size of {new_size}") + state.context["realloc_request"] = (ptr, new_size) + + add_sys_allocing_hooks(state) + add_sys_freeing_hooks(state) + + # Hook realloc return address + if HOOK_REALLOC_RETURN: + add_ret_hook("realloc", state, hook_realloc_return) diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py index ce55f4f8e..8a539a737 100644 --- a/manticore/native/heap_tracking/malloc_lib_data.py +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -17,7 +17,12 @@ class MallocLibData: def __str__(self): # TODO(Sonya): This does not print address information in hexadecimal - return f"malloc calls: {self.malloc_calls}\nfree calls: {self.free_calls}\nsbrk chunks: {self.sbrk_chunks}\nmmap chunks: {self.mmap_chunks}\n" + return ( + f"malloc calls: {self.malloc_calls}\n" + f"free calls: {self.free_calls}\n" + f"sbrk chunks: {self.sbrk_chunks}\n" + f"mmap chunks: {self.mmap_chunks}\n" + ) def _save_to_file(self, state_id: int): data = { @@ -39,7 +44,13 @@ def process_free(self, free_addr: int): # Maybe remove from malloc list and add to a used_and_free list self.free_calls.append(free_addr) - # TODO(Sonya): Add other malloc library functions here + def process_calloc(self, nmemb: int, elem_size: int, ret_addr: int): + # TODO(Sonya) + pass + + def process_realloc(self, old_addr: int, new_addr: int, size: int): + # TODO(Sonya) + pass def process_sbrk(self, ret_addr: int, size: int): # check last chunk added to list From 8468964838827da0b636c35321ef01dcc2ca5e2a Mon Sep 17 00:00:00 2001 From: sschriner Date: Tue, 26 Jan 2021 11:17:02 -0500 Subject: [PATCH 06/17] Fix Code Climate Issues --- .../native/heap_tracking/hook_malloc_library.py | 16 ++++++++-------- .../native/heap_tracking/malloc_lib_data.py | 2 ++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index f959995b2..38bb5a602 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -89,7 +89,7 @@ def hook_malloc_lib(initial_state: State, malloc: int, free: int): def hook_mmap_return(state: State): - """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + """ Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. @@ -106,7 +106,7 @@ def hook_mmap_return(state: State): def hook_mmap(state: State): - """ Hook to process mmap information and add a function hook to the callsite of mmap (which should + """ Hook to process mmap information and add a function hook to the callsite of mmap (which should be inside the free or another function inside of free which calls mmap), post execution of the mmap call. @@ -148,7 +148,7 @@ def hook_sbrk_return(state: State): def hook_sbrk(state: State): - """ Hook to process sbrk information and add a function hook to the callsite of sbrk (which should + """ Hook to process sbrk information and add a function hook to the callsite of sbrk (which should be inside malloc or another function inside of malloc which calls sbrk), post execution of the sbrk call. @@ -164,7 +164,7 @@ def hook_sbrk(state: State): def hook_malloc_return(state: State): - """ Hook to process malloc information and remove function hooks at the return address, + """ Hook to process malloc information and remove function hooks at the return address, post execution of the malloc function. malloc() returns a pointer to the allocated memory @@ -182,7 +182,7 @@ def hook_malloc_return(state: State): def hook_malloc(state: State): - """ Hook to process malloc information and add function hooks at malloc function start, + """ Hook to process malloc information and add function hooks at malloc function start, pre-execution of the malloc function. void *malloc(size_t size); @@ -200,7 +200,7 @@ def hook_malloc(state: State): def hook_munmap_return(state: State): - """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + """ Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. @@ -214,7 +214,7 @@ def hook_munmap_return(state: State): def hook_munmap(state: State): - """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + """ Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside the free or another function inside of free which calls munmap), post execution of the munmap call. @@ -231,7 +231,7 @@ def hook_munmap(state: State): def hook_free_return(state: State): - """ Hook to process free information and remove function hooks at the callsite, + """ Hook to process free information and remove function hooks at the callsite, post execution of the free function. free() has no return value diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py index 8a539a737..01393a403 100644 --- a/manticore/native/heap_tracking/malloc_lib_data.py +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -5,6 +5,8 @@ # Data Class to hold malloc_lib information # - This is added to state 0 pre-manticore execution and will be saving state specific information as manticore # forks and different program paths are found + + @dataclass class MallocLibData: """This class holds the malloc library data in a specific state (or on specific program path).""" From 218b5f95f2cc4a87da5e6014e751f2a353eaedf7 Mon Sep 17 00:00:00 2001 From: sschriner Date: Tue, 26 Jan 2021 11:23:50 -0500 Subject: [PATCH 07/17] Fixed linting --- .../heap_tracking/hook_malloc_library.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 38bb5a602..d7936eca4 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -20,7 +20,7 @@ def load_ret_addr(state: State) -> int: - """ Loads the return address of a function from the stack + """Loads the return address of a function from the stack (Assuming the next instruction to be executed is the start of a function call) """ stack_location = state.cpu.read_register("STACK") @@ -68,7 +68,7 @@ def remove_sys_allocing_hooks(state: State): def hook_malloc_lib(initial_state: State, malloc: int, free: int): - """ Function to add malloc hooks and do prep work + """Function to add malloc hooks and do prep work - TODO(Sonya): would like this to eventially be __init__() method for a class once manticore hook callbacks have been debugged. (from Eric) See: https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L163-L218 @@ -89,7 +89,7 @@ def hook_malloc_lib(initial_state: State, malloc: int, free: int): def hook_mmap_return(state: State): - """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + """Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. @@ -106,7 +106,7 @@ def hook_mmap_return(state: State): def hook_mmap(state: State): - """ Hook to process mmap information and add a function hook to the callsite of mmap (which should + """Hook to process mmap information and add a function hook to the callsite of mmap (which should be inside the free or another function inside of free which calls mmap), post execution of the mmap call. @@ -132,7 +132,7 @@ def hook_mmap(state: State): # https://github.com/trailofbits/manticore/blob/f46f78b69bd440af144f19ec97695ec7e911a374/manticore/platforms/linux.py#L1864 # state.platform.brk gives current brk def hook_sbrk_return(state: State): - """ Hook to process sbrk return information and remove the hook to itself at the callsite to sbrk, + """Hook to process sbrk return information and remove the hook to itself at the callsite to sbrk, post execution of the sbrk function. sbrk() returns the previous program break - on error, (void *) -1 is returned @@ -148,7 +148,7 @@ def hook_sbrk_return(state: State): def hook_sbrk(state: State): - """ Hook to process sbrk information and add a function hook to the callsite of sbrk (which should + """Hook to process sbrk information and add a function hook to the callsite of sbrk (which should be inside malloc or another function inside of malloc which calls sbrk), post execution of the sbrk call. @@ -164,7 +164,7 @@ def hook_sbrk(state: State): def hook_malloc_return(state: State): - """ Hook to process malloc information and remove function hooks at the return address, + """Hook to process malloc information and remove function hooks at the return address, post execution of the malloc function. malloc() returns a pointer to the allocated memory @@ -182,7 +182,7 @@ def hook_malloc_return(state: State): def hook_malloc(state: State): - """ Hook to process malloc information and add function hooks at malloc function start, + """Hook to process malloc information and add function hooks at malloc function start, pre-execution of the malloc function. void *malloc(size_t size); @@ -200,7 +200,7 @@ def hook_malloc(state: State): def hook_munmap_return(state: State): - """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + """Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside malloc or another function inside of malloc which calls munmap), post execution of the munmap call. @@ -214,7 +214,7 @@ def hook_munmap_return(state: State): def hook_munmap(state: State): - """ Hook to process munmap information and add a function hook to the callsite of munmap (which should + """Hook to process munmap information and add a function hook to the callsite of munmap (which should be inside the free or another function inside of free which calls munmap), post execution of the munmap call. @@ -231,7 +231,7 @@ def hook_munmap(state: State): def hook_free_return(state: State): - """ Hook to process free information and remove function hooks at the callsite, + """Hook to process free information and remove function hooks at the callsite, post execution of the free function. free() has no return value @@ -245,7 +245,7 @@ def hook_free_return(state: State): def hook_free(state: State): - """ Hook to process free information and add function hooks at free function start, + """Hook to process free information and add function hooks at free function start, pre-execution of the free function. void free(void *ptr); @@ -263,7 +263,7 @@ def hook_free(state: State): def hook_calloc_return(state: State): - """ Hook to process calloc information and remove function hooks at the callsite, + """Hook to process calloc information and remove function hooks at the callsite, post execution of the calloc function. calloc() returns a pointer to the allocated memory @@ -284,7 +284,7 @@ def hook_calloc_return(state: State): def hook_calloc(state: State): - """ Hook to process calloc information and add function hooks at calloc function start, + """Hook to process calloc information and add function hooks at calloc function start, pre-execution of the calloc function. void *calloc(size_t nmemb, size_t size); @@ -303,7 +303,7 @@ def hook_calloc(state: State): def hook_realloc_return(state: State): - """ Hook to process realloc information and remove function hooks at the callsite, + """Hook to process realloc information and remove function hooks at the callsite, post execution of the realloc function. realloc() returns a pointer to the newly allocated memory @@ -325,7 +325,7 @@ def hook_realloc_return(state: State): def hook_realloc(state: State): - """ Hook to process realloc information and add function hooks at realloc function start, + """Hook to process realloc information and add function hooks at realloc function start, pre-execution of the realloc function. void *realloc(void *ptr, size_t size); From fe3f71535bbd6e42131df8f396fbcac492e099b3 Mon Sep 17 00:00:00 2001 From: sschriner Date: Tue, 26 Jan 2021 15:03:00 -0500 Subject: [PATCH 08/17] Fixed a couple small errors --- .../heap_tracking/hook_malloc_library.py | 59 ++++++++++++------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index d7936eca4..07185a870 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -9,14 +9,12 @@ logger.setLevel(2) -# Globals that will become class members to control amount of information being retrieved -# TODO(Sonya): fine tune these a little bit - need to be automated -HOOK_SBRK_INFO = True -HOOK_MMAP_INFO = True -HOOK_MALLOC_RETURN = True -HOOK_FREE_RETURN = True -HOOK_CALLOC_RETURN = True -HOOK_REALLOC_RETURN = True +HOOK_SBRK_INFO: bool +HOOK_MMAP_INFO: bool +HOOK_MALLOC_RETURN: bool +HOOK_FREE_RETURN: bool +HOOK_CALLOC_RETURN: bool +HOOK_REALLOC_RETURN: bool def load_ret_addr(state: State) -> int: @@ -36,7 +34,7 @@ def add_ret_hook(func: str, state: State, ret_hook: Callable[[State], None]) -> def add_sys_freeing_hooks(state: State): if HOOK_MMAP_INFO: - logger.debug(f"Adding Hook for munmap in state: {state.id}") + logger.debug(f"Adding hook for munmap in state: {state.id}") state.add_hook(state.context["munmap"], hook_munmap, after=False) @@ -48,12 +46,11 @@ def remove_sys_freeing_hooks(state: State): def add_sys_allocing_hooks(state: State): if HOOK_SBRK_INFO: - logger.debug(f"Adding Hook for sbrk in state: {state.id}") - state.add_hook("sbrk", hook_sbrk, after=False) + logger.debug(f"Adding hook for sbrk in state: {state.id}") state.add_hook(state.context["sbrk"], hook_sbrk, after=False) if HOOK_MMAP_INFO: - logger.debug(f"Adding Hook for mmap in state: {state.id}") + logger.debug(f"Adding hook for mmap in state: {state.id}") state.add_hook(state.context["mmap"], hook_mmap, after=False) @@ -67,7 +64,19 @@ def remove_sys_allocing_hooks(state: State): state.remove_hook(state.context["mmap"], hook_mmap) -def hook_malloc_lib(initial_state: State, malloc: int, free: int): +def hook_malloc_lib( + initial_state: State, + malloc: int, + free: int, + calloc: int, + realloc: int, + hook_sbrk: bool = True, + hook_mmap: bool = True, + hook_malloc_ret: bool = True, + hook_free_ret: bool = True, + hook_calloc_ret: bool = True, + hook_realloc_ret: bool = True, +): """Function to add malloc hooks and do prep work - TODO(Sonya): would like this to eventially be __init__() method for a class once manticore hook callbacks have been debugged. @@ -76,11 +85,19 @@ def hook_malloc_lib(initial_state: State, malloc: int, free: int): """ initial_state.context["malloc_lib"] = MallocLibData() + global HOOK_SBRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN + HOOK_SBRK_INFO = hook_sbrk + HOOK_MMAP_INFO = hook_mmap + HOOK_MALLOC_RETURN = hook_malloc_ret + HOOK_FREE_RETURN = hook_free_ret + HOOK_CALLOC_RETURN = hook_calloc_ret + HOOK_REALLOC_RETURN = hook_realloc_ret + # Hook malloc and free initial_state.add_hook(malloc, hook_malloc, after=False) initial_state.add_hook(free, hook_free, after=False) - # initial_state.add_hook(calloc, hook_calloc, after=False) - # initial_state.add_hook(realloc, hook_realloc, after=False) + initial_state.add_hook(calloc, hook_calloc, after=False) + initial_state.add_hook(realloc, hook_realloc, after=False) # Fixme: with syscall specific hooks initial_state.context["sbrk"] = 0x0 @@ -289,9 +306,9 @@ def hook_calloc(state: State): void *calloc(size_t nmemb, size_t size); """ - - nmemb = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) - elem_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + args_gen = state._platform._function_abi.get_arguments() + nmemb = state.cpu.read_register(next(args_gen)) + elem_size = state.cpu.read_register(next(args_gen)) logger.info(f"Invoking calloc for {nmemb} element(s) of size: {elem_size}") state.context["calloc_request"] = (nmemb, elem_size) @@ -330,9 +347,9 @@ def hook_realloc(state: State): void *realloc(void *ptr, size_t size); """ - - ptr = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) - new_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + args_gen = state._platform._function_abi.get_arguments() + ptr = state.cpu.read_register(next(args_gen)) + new_size = state.cpu.read_register(next(args_gen)) logger.info(f"Attempting to realloc: {hex(ptr)} to a requested size of {new_size}") state.context["realloc_request"] = (ptr, new_size) From bc1259bd31e623034aa4cd994855a475923b78f9 Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 27 Jan 2021 16:37:53 -0500 Subject: [PATCH 09/17] Small edits --- .../heap_tracking/hook_malloc_library.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 07185a870..06efb6733 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -56,7 +56,7 @@ def add_sys_allocing_hooks(state: State): def remove_sys_allocing_hooks(state: State): if HOOK_SBRK_INFO: - logger.debug((f"Unhooking sbrk in state: {state.id}")) + logger.debug(f"Unhooking sbrk in state: {state.id}") state.remove_hook(state.context["sbrk"], hook_sbrk) if HOOK_MMAP_INFO: @@ -70,12 +70,12 @@ def hook_malloc_lib( free: int, calloc: int, realloc: int, - hook_sbrk: bool = True, - hook_mmap: bool = True, - hook_malloc_ret: bool = True, - hook_free_ret: bool = True, - hook_calloc_ret: bool = True, - hook_realloc_ret: bool = True, + hook_sbrk_info: bool = True, + hook_mmap_info: bool = True, + hook_malloc_ret_info: bool = True, + hook_free_ret_info: bool = True, + hook_calloc_ret_info: bool = True, + hook_realloc_ret_info: bool = True, ): """Function to add malloc hooks and do prep work - TODO(Sonya): would like this to eventially be __init__() method for a class @@ -86,12 +86,12 @@ def hook_malloc_lib( initial_state.context["malloc_lib"] = MallocLibData() global HOOK_SBRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN - HOOK_SBRK_INFO = hook_sbrk - HOOK_MMAP_INFO = hook_mmap - HOOK_MALLOC_RETURN = hook_malloc_ret - HOOK_FREE_RETURN = hook_free_ret - HOOK_CALLOC_RETURN = hook_calloc_ret - HOOK_REALLOC_RETURN = hook_realloc_ret + HOOK_SBRK_INFO = hook_sbrk_info + HOOK_MMAP_INFO = hook_mmap_info + HOOK_MALLOC_RETURN = hook_malloc_ret_info + HOOK_FREE_RETURN = hook_free_ret_info + HOOK_CALLOC_RETURN = hook_calloc_ret_info + HOOK_REALLOC_RETURN = hook_realloc_ret_info # Hook malloc and free initial_state.add_hook(malloc, hook_malloc, after=False) @@ -99,7 +99,7 @@ def hook_malloc_lib( initial_state.add_hook(calloc, hook_calloc, after=False) initial_state.add_hook(realloc, hook_realloc, after=False) - # Fixme: with syscall specific hooks + # TODO(Sonya) - Fixme: with syscall specific hooks initial_state.context["sbrk"] = 0x0 initial_state.context["mmap"] = 0x0 initial_state.context["munmap"] = 0x0 From 1f7327e4baf14f346ffc38e2225e5ed87c6f6a2f Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 3 Feb 2021 12:38:40 -0500 Subject: [PATCH 10/17] Adding plumbing for sys hooks --- .../native/heap_tracking/heap_syscalls.py | 20 +++++++++++++ .../heap_tracking/hook_malloc_library.py | 30 ++++++++++++------- 2 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 manticore/native/heap_tracking/heap_syscalls.py diff --git a/manticore/native/heap_tracking/heap_syscalls.py b/manticore/native/heap_tracking/heap_syscalls.py new file mode 100644 index 000000000..33baebe84 --- /dev/null +++ b/manticore/native/heap_tracking/heap_syscalls.py @@ -0,0 +1,20 @@ +i386 = { + "mmap": -1, # FIXME!!!! + "brk": 45, + "munmap": 91, +} +amd64 = { + "mmap": 9, + "brk": 12, + "munmap": 11, +} +armv7 = { + "mmap": -1, # FIXME!!!! + "brk": 45, + "munmap": 91, +} +aarch64 = { + "mmap": 222, + "brk": 214, + "munmap": 215, +} diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 06efb6733..a8717999a 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -16,6 +16,10 @@ HOOK_CALLOC_RETURN: bool HOOK_REALLOC_RETURN: bool +BRK_SYS_NUM: int +MMAP_SYS_NUM: int +MUNMAP_SYS_NUM: int + def load_ret_addr(state: State) -> int: """Loads the return address of a function from the stack @@ -35,33 +39,33 @@ def add_ret_hook(func: str, state: State, ret_hook: Callable[[State], None]) -> def add_sys_freeing_hooks(state: State): if HOOK_MMAP_INFO: logger.debug(f"Adding hook for munmap in state: {state.id}") - state.add_hook(state.context["munmap"], hook_munmap, after=False) + state.add_hook(MUNMAP_SYS_NUM, hook_munmap, after=False, syscall=True) def remove_sys_freeing_hooks(state: State): if HOOK_MMAP_INFO: logger.debug(f"Unhooking munmap in state: {state.id}") - state.remove_hook(state.context["munmap"], hook_munmap) + state.remove_hook(MUNMAP_SYS_NUM, hook_munmap, syscall=True) def add_sys_allocing_hooks(state: State): if HOOK_SBRK_INFO: logger.debug(f"Adding hook for sbrk in state: {state.id}") - state.add_hook(state.context["sbrk"], hook_sbrk, after=False) + state.add_hook(BRK_SYS_NUM, hook_sbrk, after=False, syscall=True) if HOOK_MMAP_INFO: logger.debug(f"Adding hook for mmap in state: {state.id}") - state.add_hook(state.context["mmap"], hook_mmap, after=False) + state.add_hook(MMAP_SYS_NUM, hook_mmap, after=False, syscall=True) def remove_sys_allocing_hooks(state: State): if HOOK_SBRK_INFO: logger.debug(f"Unhooking sbrk in state: {state.id}") - state.remove_hook(state.context["sbrk"], hook_sbrk) + state.remove_hook(BRK_SYS_NUM, hook_sbrk, syscall=True) if HOOK_MMAP_INFO: logger.debug(f"Unhooking mmap in state: {state.id}") - state.remove_hook(state.context["mmap"], hook_mmap) + state.remove_hook(MMAP_SYS_NUM, hook_mmap, syscall=True) def hook_malloc_lib( @@ -98,11 +102,16 @@ def hook_malloc_lib( initial_state.add_hook(free, hook_free, after=False) initial_state.add_hook(calloc, hook_calloc, after=False) initial_state.add_hook(realloc, hook_realloc, after=False) + # print(initial_state._hooks) + + # Import syscall numbers for current architecture + global BRK_SYS_NUM, MMAP_SYS_NUM, MUNMAP_SYS_NUM + from . import heap_syscalls - # TODO(Sonya) - Fixme: with syscall specific hooks - initial_state.context["sbrk"] = 0x0 - initial_state.context["mmap"] = 0x0 - initial_state.context["munmap"] = 0x0 + table = getattr(heap_syscalls, initial_state.platform.current.machine) + BRK_SYS_NUM = table["brk"] + MMAP_SYS_NUM = table["mmap"] + MUNMAP_SYS_NUM = table["munmap"] def hook_mmap_return(state: State): @@ -154,6 +163,7 @@ def hook_sbrk_return(state: State): sbrk() returns the previous program break - on error, (void *) -1 is returned """ + # TODO: FIXME(Sonya) update this since we're hooking brk instead of sbrk now ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"sbrk ret val: {hex(ret_val)}") From c42248f0d09d46bd12af97dd63cbd5344e8c289b Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 3 Feb 2021 17:52:15 -0500 Subject: [PATCH 11/17] Updates for swapping sbrk and brk hooks --- .../heap_tracking/hook_malloc_library.py | 72 ++++++++++--------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index a8717999a..96d5bae15 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -9,7 +9,7 @@ logger.setLevel(2) -HOOK_SBRK_INFO: bool +HOOK_BRK_INFO: bool HOOK_MMAP_INFO: bool HOOK_MALLOC_RETURN: bool HOOK_FREE_RETURN: bool @@ -49,9 +49,9 @@ def remove_sys_freeing_hooks(state: State): def add_sys_allocing_hooks(state: State): - if HOOK_SBRK_INFO: - logger.debug(f"Adding hook for sbrk in state: {state.id}") - state.add_hook(BRK_SYS_NUM, hook_sbrk, after=False, syscall=True) + if HOOK_BRK_INFO: + logger.debug(f"Adding hook for brk in state: {state.id}") + state.add_hook(BRK_SYS_NUM, hook_brk, after=False, syscall=True) if HOOK_MMAP_INFO: logger.debug(f"Adding hook for mmap in state: {state.id}") @@ -59,9 +59,9 @@ def add_sys_allocing_hooks(state: State): def remove_sys_allocing_hooks(state: State): - if HOOK_SBRK_INFO: - logger.debug(f"Unhooking sbrk in state: {state.id}") - state.remove_hook(BRK_SYS_NUM, hook_sbrk, syscall=True) + if HOOK_BRK_INFO: + logger.debug(f"Unhooking brk in state: {state.id}") + state.remove_hook(BRK_SYS_NUM, hook_brk, syscall=True) if HOOK_MMAP_INFO: logger.debug(f"Unhooking mmap in state: {state.id}") @@ -74,7 +74,7 @@ def hook_malloc_lib( free: int, calloc: int, realloc: int, - hook_sbrk_info: bool = True, + hook_brk_info: bool = True, hook_mmap_info: bool = True, hook_malloc_ret_info: bool = True, hook_free_ret_info: bool = True, @@ -89,8 +89,8 @@ def hook_malloc_lib( """ initial_state.context["malloc_lib"] = MallocLibData() - global HOOK_SBRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN - HOOK_SBRK_INFO = hook_sbrk_info + global HOOK_BRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN + HOOK_BRK_INFO = hook_brk_info HOOK_MMAP_INFO = hook_mmap_info HOOK_MALLOC_RETURN = hook_malloc_ret_info HOOK_FREE_RETURN = hook_free_ret_info @@ -127,7 +127,7 @@ def hook_mmap_return(state: State): state.context["malloc_lib"].process_mmap(ret_val, state.context["mmap_args"]) del state.context["mmap_args"] - logger.debug(f"Unhooking mmap return in malloc in state: {state.id}") + logger.debug(f"Unhooking mmap return in state: {state.id}") state.remove_hook(state.cpu.read_register("PC"), hook_mmap_return) @@ -152,42 +152,44 @@ def hook_mmap(state: State): add_ret_hook("mmap", state, hook_mmap_return) -# NOTE(Sonya): If I can't find the internal sbrk address I can get to manticore brk. -# .....so I can calculate: sbrk_chunk size = curr_brk - new_brk, sbrk_ret_val = new_brk -# where new_brk is the argument passed into brk - see brk and sbrk man pages -# https://github.com/trailofbits/manticore/blob/f46f78b69bd440af144f19ec97695ec7e911a374/manticore/platforms/linux.py#L1864 -# state.platform.brk gives current brk -def hook_sbrk_return(state: State): - """Hook to process sbrk return information and remove the hook to itself at the callsite to sbrk, - post execution of the sbrk function. +def hook_brk_return(state: State): + """Hook to process brk return information and remove the hook to itself at the callsite to brk, + post execution of the brk function. - sbrk() returns the previous program break - on error, (void *) -1 is returned + brk() returns 0 - on error, -1 is returned """ - # TODO: FIXME(Sonya) update this since we're hooking brk instead of sbrk now ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) - logger.info(f"sbrk ret val: {hex(ret_val)}") + logger.info(f"brk ret val: {hex(ret_val)}") - state.context["malloc_lib"].process_sbrk(ret_val, state.context["sbrk_size"]) - del state.context["sbrk_size"] + state.context["malloc_lib"].process_brk(ret_val, state.context["brk_increment"]) + del state.context["brk_increment"] - logger.debug(f"Unhooking sbrk return in malloc in state: {state.id}") - state.remove_hook(state.cpu.read_register("PC"), hook_sbrk_return) + logger.debug(f"Unhooking brk return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_brk_return) -def hook_sbrk(state: State): - """Hook to process sbrk information and add a function hook to the callsite of sbrk (which should - be inside malloc or another function inside of malloc which calls sbrk), post execution of the - sbrk call. +def hook_brk(state: State): + """Hook to process brk information and add a function hook to the callsite of brk (which should + be inside malloc or another function inside of malloc which calls brk), post execution of the + brk call. + + Note (Sonya): Reminder that any call to sbrk with a val of 0 will never reach brk + Note (Sonya): See https://code.woboq.org/userspace/glibc/misc/sbrk.c.html for approximate + sbrk implementation void *sbrk(intptr_t increment); + int brk(void *addr); """ # Get request size from arg1 - request_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) - logger.info(f"Invoking sbrk in malloc. Request Size {request_size}") - state.context["sbrk_size"] = request_size + addr = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + increment = addr - state.platform.brk + logger.info( + f"Invoking brk. Request address: {addr} for an increment of {increment}. Old brk: {state.platform.brk}" + ) + state.context["brk_increment"] = increment # Pull return address off the stack and add a hook for it - add_ret_hook("sbrk", state, hook_sbrk_return) + add_ret_hook("brk", state, hook_brk_return) def hook_malloc_return(state: State): @@ -236,7 +238,7 @@ def hook_munmap_return(state: State): ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) logger.info(f"munmap ret val: {hex(ret_val)}") - logger.debug(f"Unhooking munmap return in malloc in state: {state.id}") + logger.debug(f"Unhooking munmap return in state: {state.id}") state.remove_hook(state.cpu.read_register("PC"), hook_munmap_return) From 7d3cc68412f4e08e1306c41749bd46e1e81398c0 Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 3 Feb 2021 18:03:32 -0500 Subject: [PATCH 12/17] More automation --- .../heap_tracking/hook_malloc_library.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 96d5bae15..ab9932de0 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -70,10 +70,10 @@ def remove_sys_allocing_hooks(state: State): def hook_malloc_lib( initial_state: State, - malloc: int, - free: int, - calloc: int, - realloc: int, + malloc: int = 0x0, + free: int = 0x0, + calloc: int = 0x0, + realloc: int = 0x0, hook_brk_info: bool = True, hook_mmap_info: bool = True, hook_malloc_ret_info: bool = True, @@ -98,11 +98,14 @@ def hook_malloc_lib( HOOK_REALLOC_RETURN = hook_realloc_ret_info # Hook malloc and free - initial_state.add_hook(malloc, hook_malloc, after=False) - initial_state.add_hook(free, hook_free, after=False) - initial_state.add_hook(calloc, hook_calloc, after=False) - initial_state.add_hook(realloc, hook_realloc, after=False) - # print(initial_state._hooks) + if malloc: + initial_state.add_hook(malloc, hook_malloc, after=False) + if free: + initial_state.add_hook(free, hook_free, after=False) + if calloc: + initial_state.add_hook(calloc, hook_calloc, after=False) + if realloc: + initial_state.add_hook(realloc, hook_realloc, after=False) # Import syscall numbers for current architecture global BRK_SYS_NUM, MMAP_SYS_NUM, MUNMAP_SYS_NUM From 5fbbcf1ce87f07b71de673df667acb299876fb6d Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 3 Feb 2021 18:05:07 -0500 Subject: [PATCH 13/17] Comment change --- manticore/native/heap_tracking/hook_malloc_library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index ab9932de0..9c5d3d695 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -97,7 +97,7 @@ def hook_malloc_lib( HOOK_CALLOC_RETURN = hook_calloc_ret_info HOOK_REALLOC_RETURN = hook_realloc_ret_info - # Hook malloc and free + # Add requested malloc lib hooks if malloc: initial_state.add_hook(malloc, hook_malloc, after=False) if free: From bb1e1ecc8d390cacfdc2f3d32a27212f75a897fc Mon Sep 17 00:00:00 2001 From: sschriner Date: Wed, 3 Feb 2021 18:20:31 -0500 Subject: [PATCH 14/17] Tentatively chose syscall numbers for mmap in i386 and armv7 --- manticore/native/heap_tracking/heap_syscalls.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/manticore/native/heap_tracking/heap_syscalls.py b/manticore/native/heap_tracking/heap_syscalls.py index 33baebe84..1729cfbcb 100644 --- a/manticore/native/heap_tracking/heap_syscalls.py +++ b/manticore/native/heap_tracking/heap_syscalls.py @@ -1,20 +1,20 @@ i386 = { - "mmap": -1, # FIXME!!!! "brk": 45, + "mmap": 192, # sys_mmap_pgoff "munmap": 91, } amd64 = { - "mmap": 9, "brk": 12, + "mmap": 9, "munmap": 11, } armv7 = { - "mmap": -1, # FIXME!!!! "brk": 45, + "mmap": 192, # sys_mmap2 "munmap": 91, } aarch64 = { - "mmap": 222, "brk": 214, + "mmap": 222, "munmap": 215, } From d7cf7e27467902a2720a7110a80a8aef69c302fc Mon Sep 17 00:00:00 2001 From: sschriner Date: Mon, 8 Feb 2021 13:52:13 -0500 Subject: [PATCH 15/17] rename of sbrk to brk in malloc_lib_data --- manticore/native/heap_tracking/malloc_lib_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py index 01393a403..b10d0abca 100644 --- a/manticore/native/heap_tracking/malloc_lib_data.py +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -54,7 +54,7 @@ def process_realloc(self, old_addr: int, new_addr: int, size: int): # TODO(Sonya) pass - def process_sbrk(self, ret_addr: int, size: int): + def process_brk(self, ret_addr: int, size: int): # check last chunk added to list # if size + address == new starting address of chunk -> add new chunk size to last allocated chunk # else -> add a new chunk to the list From 60e56dd8e49b4af2618fd5808378319f6a34c80d Mon Sep 17 00:00:00 2001 From: sschriner Date: Tue, 23 Feb 2021 13:57:20 -0500 Subject: [PATCH 16/17] Added assertion and updated folder to data path --- manticore/native/heap_tracking/hook_malloc_library.py | 10 ++++++++-- manticore/native/heap_tracking/malloc_lib_data.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index 9c5d3d695..e6ff4f273 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -3,7 +3,7 @@ from manticore.native.heap_tracking.malloc_lib_data import MallocLibData import logging -from typing import Callable +from typing import Callable, Optional logger = logging.getLogger(__name__) logger.setLevel(2) @@ -80,6 +80,7 @@ def hook_malloc_lib( hook_free_ret_info: bool = True, hook_calloc_ret_info: bool = True, hook_realloc_ret_info: bool = True, + workspace: Optional[str] = None, ): """Function to add malloc hooks and do prep work - TODO(Sonya): would like this to eventially be __init__() method for a class @@ -87,7 +88,12 @@ def hook_malloc_lib( (from Eric) See: https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L163-L218 & https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L274-L278 to work on debugging this """ - initial_state.context["malloc_lib"] = MallocLibData() + # This features use on platforms besides amd64 is entirely untested + assert initial_state.platform.current.machine == "amd64", ( + "This feature's use on platforms besides amd64 is " "entirely untested." + ) + + initial_state.context["malloc_lib"] = MallocLibData(workspace) global HOOK_BRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN HOOK_BRK_INFO = hook_brk_info diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py index b10d0abca..60c5822c3 100644 --- a/manticore/native/heap_tracking/malloc_lib_data.py +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Optional from dataclasses import dataclass, field import json @@ -11,6 +11,7 @@ class MallocLibData: """This class holds the malloc library data in a specific state (or on specific program path).""" + workspace: Optional[str] malloc_calls: List[Tuple[int, int]] = field(default_factory=list) free_calls: List[int] = field(default_factory=list) sbrk_chunks: List[Tuple[int, int]] = field(default_factory=list) @@ -33,8 +34,12 @@ def _save_to_file(self, state_id: int): "sbrk_chunks": self.sbrk_chunks, "mmap_chunks": self.mmap_chunks, } - with open(f"m_out/malloc_{state_id}.json", "w+") as write_file: - json.dump(data, write_file, indent=4) + if self.workspace: + with open(f"{self.workspace}/malloc_{state_id}.json", "w+") as write_file: + json.dump(data, write_file, indent=4) + else: + with open(f"m_out/malloc_{state_id}.json", "w+") as write_file: + json.dump(data, write_file, indent=4) # TODO(Sonya): Add some more methods here for helpful semantics of recording/retrieving information # Might want to annotate all this with instruction address information From 38bcb7eb07e7c13467d8a1d3e8dde31bf714fa7a Mon Sep 17 00:00:00 2001 From: sschriner Date: Thu, 6 May 2021 11:42:40 -0400 Subject: [PATCH 17/17] updates --- .../heap_tracking/hook_malloc_library.py | 2 +- .../native/heap_tracking/malloc_lib_data.py | 24 +++++++- manticore/native/plugins.py | 55 +++++++++++++++++++ setup.py | 1 + 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py index e6ff4f273..07f514dbc 100644 --- a/manticore/native/heap_tracking/hook_malloc_library.py +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -317,7 +317,7 @@ def hook_calloc_return(state: State): remove_sys_allocing_hooks(state) logger.debug(f"Unhooking calloc return in state: {state.id}") - state.remove_hook(state.cpu.read_register("PC"), calloc_free_return) + state.remove_hook(state.cpu.read_register("PC"), hook_calloc_return) logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py index 60c5822c3..de0d0c5f3 100644 --- a/manticore/native/heap_tracking/malloc_lib_data.py +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -1,12 +1,23 @@ -from typing import List, Dict, Tuple, Optional -from dataclasses import dataclass, field import json +from dataclasses import dataclass, field +from intervaltree import Interval, IntervalTree +from typing import List, Dict, Tuple, Optional + # Data Class to hold malloc_lib information # - This is added to state 0 pre-manticore execution and will be saving state specific information as manticore # forks and different program paths are found +@dataclass +class AllocationInformation: + """This class wraps information about an allocation""" + + addr: int + requested_size: int + is_freed: bool + + @dataclass class MallocLibData: """This class holds the malloc library data in a specific state (or on specific program path).""" @@ -17,6 +28,10 @@ class MallocLibData: sbrk_chunks: List[Tuple[int, int]] = field(default_factory=list) mmap_chunks: Dict[int, int] = field(default_factory=dict) munmap_chunks: Dict[int, int] = field(default_factory=dict) + malloc_lib_tree: IntervalTree = field(default_factory=IntervalTree) + system_heap_tree: IntervalTree = field( + default_factory=IntervalTree + ) # TODO(sonya): this needs support def __str__(self): # TODO(Sonya): This does not print address information in hexadecimal @@ -46,10 +61,15 @@ def _save_to_file(self, state_id: int): def process_malloc(self, ret_addr: int, size: int): # should add malloc call information to list self.malloc_calls.append((ret_addr, size)) + self.malloc_lib_tree[ret_addr : ret_addr + size] = AllocationInformation( + ret_addr, size, False + ) def process_free(self, free_addr: int): # Maybe remove from malloc list and add to a used_and_free list self.free_calls.append(free_addr) + for allocation in sorted(self.malloc_lib_tree[free_addr]): + allocation.data.is_freed = True def process_calloc(self, nmemb: int, elem_size: int, ret_addr: int): # TODO(Sonya) diff --git a/manticore/native/plugins.py b/manticore/native/plugins.py index 939f5cb97..5eb0aa6d4 100644 --- a/manticore/native/plugins.py +++ b/manticore/native/plugins.py @@ -1,5 +1,7 @@ from ..core.plugin import Plugin from .state_merging import merge_constraints, is_merge_possible, merge +from heap_tracking.hook_malloc_library import hook_malloc_lib +from manticore.native.state import State import logging logger = logging.getLogger(__name__) @@ -125,3 +127,56 @@ def will_load_state_callback(self, current_state_id): # UGLY we are replacing a state_id. This may be breaking caches in # the future self.replace_state(current_state_id, merged_state) + + +class TrackHeapInformation(Plugin): + """ + Enables tracking heap information given that a user has access to the malloc library addresses. + + This feature is only supported in X86 mode. + + TODO(sonya): make this queryable in some way + TODO(sonya): context for a given state + TODO(sonya): list (or dict) of context for all manticore states + TODO(sonya): plugin function to dump the results somewhere + + """ + + def __init__( + self, + m: Manticore, + malloc: int = 0x0, + free: int = 0x0, + calloc: int = 0x0, + realloc: int = 0x0, + hook_brk_info: bool = True, + hook_mmap_info: bool = True, + hook_malloc_ret_info: bool = True, + hook_free_ret_info: bool = True, + hook_calloc_ret_info: bool = True, + hook_realloc_ret_info: bool = True, + ): + super().__init__() + + assert malloc or free or calloc or realloc, ( + "No malloc library addresses provided. Please specify the address of at least one malloc library function" + " to track it's corresponding heap information" + ) + + def init_heap_tracking(initial_state: State): + hook_malloc_lib( + initial_state, + malloc=malloc, + free=free, + calloc=calloc, + realloc=realloc, + workspace=m._workspace._store.uri, + hook_brk_info=hook_brk_info, + hook_mmap_info=hook_mmap_info, + hook_malloc_ret_info=hook_malloc_ret_info, + hook_free_ret_info=hook_free_ret_info, + hook_calloc_ret_info=hook_calloc_ret_info, + hook_realloc_ret_info=hook_realloc_ret_info, + ) + + m.init(init_heap_tracking) diff --git a/setup.py b/setup.py index 350b66311..53c4d2ad4 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,7 @@ def rtd_dependent_deps(): "wasm", "dataclasses; python_version < '3.7'", "pyevmasm>=0.2.3", + "intervaltree", ] + rtd_dependent_deps(), extras_require=extra_require,