-
Notifications
You must be signed in to change notification settings - Fork 1.8k
[experiment]: Use capstone to implement ELF.libc_start_main_ret
#2580
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -54,6 +54,7 @@ | |||||||
| import re | ||||||||
| import subprocess | ||||||||
| import tempfile | ||||||||
| import capstone as cs | ||||||||
|
|
||||||||
| from io import BytesIO | ||||||||
|
|
||||||||
|
|
@@ -1143,6 +1144,12 @@ def _populate_kernel_version(self): | |||||||
|
|
||||||||
| self.config['version'] = self.version | ||||||||
|
|
||||||||
| def cs_disasm(self, md: cs.Cs, address, n_bytes): | ||||||||
| if self.arch == 'arm' and address & 1: | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| address -= 1 | ||||||||
|
|
||||||||
| return md.disasm(self.read(address, n_bytes), address) | ||||||||
|
|
||||||||
| @property | ||||||||
| def libc_start_main_return(self): | ||||||||
| """:class:`int`: Address of the return address into __libc_start_main from main. | ||||||||
|
|
@@ -1157,62 +1164,88 @@ def libc_start_main_return(self): | |||||||
| to list all calls inside __libc_start_main, find the call to exit | ||||||||
| after the call to main and select the previous call. | ||||||||
| """ | ||||||||
| if '__libc_start_main' not in self.functions: | ||||||||
| func = self.functions.get('__libc_start_main') | ||||||||
| exit_addr = self.symbols.get('exit') | ||||||||
| if not (func and exit_addr): | ||||||||
| return 0 | ||||||||
|
|
||||||||
| if 'exit' not in self.symbols: | ||||||||
| return 0 | ||||||||
| # `__libc_start_call_main` is usually smaller than `__libc_start_main`, | ||||||||
| # (except for powerpc which uses a bigger `generic_start_main`), so | ||||||||
| # we might disassemble a bit too much, but it's a good dynamic estimate. | ||||||||
| callee_size = func.size | ||||||||
| # most arch's call instruction has the first operands as an intermidiate, except s390 | ||||||||
| imm_index = 0 | ||||||||
| eabi = None | ||||||||
|
|
||||||||
| # If there's no delay slot, execution continues on the next instruction after a call. | ||||||||
| call_return_offset = 1 | ||||||||
| call_instructions = set([cs.CS_GRP_CALL]) | ||||||||
| if self.arch in ['arm', 'thumb']: | ||||||||
| call_instructions = set(['blx', 'bl']) | ||||||||
| # FIXME: I have no idea why setting self.arch = 'armhf' does not work | ||||||||
| if b'armhf' in self.linker: eabi = 'hf' | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like this. Maybe just always use just arm and always try eabihf first for disassembly as it handles a strict superset of instructions? |
||||||||
| if exit_addr & 1: exit_addr -= 1 | ||||||||
| elif self.arch == 'aarch64': | ||||||||
| call_instructions = set(['blr', 'bl']) | ||||||||
| pass | ||||||||
| elif self.arch in ['mips', 'mips64']: | ||||||||
| call_instructions = set(['bal', 'jalr']) | ||||||||
| # Account for the delay slot. | ||||||||
| call_return_offset = 2 | ||||||||
| elif self.arch in ['i386', 'amd64', 'ia64']: | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe move the empty suites to the end or remove them altogether and also remove the error case? |
||||||||
| call_instructions = set(['call']) | ||||||||
| pass | ||||||||
| elif self.arch in ['ppc', 'powerpc', 'powerpc64']: | ||||||||
| callee_size *= 2 | ||||||||
| # powepc often jumps to the local entry point after TOC setup | ||||||||
| if exit_addr & 1 == 0: exit_addr += 8 | ||||||||
| pass | ||||||||
| elif self.arch in ['em_s390', 's390']: | ||||||||
| imm_index = 1 | ||||||||
| pass | ||||||||
| else: | ||||||||
| log.error('Unsupported architecture %s in ELF.libc_start_main_return', self.arch) | ||||||||
| return 0 | ||||||||
|
|
||||||||
| lines = self.functions['__libc_start_main'].disasm().split('\n') | ||||||||
| exit_addr = hex(self.symbols['exit']) | ||||||||
| calls = [(index, line) for index, line in enumerate(lines) if set(line.split()) & call_instructions] | ||||||||
| from pwnlib.asm import get_cs_disassembler | ||||||||
| md = get_cs_disassembler(arch=self.arch, endian=self.endian, bits=self.bits, eabi=eabi) | ||||||||
| dis = list(self.cs_disasm(md, func.address, func.size)) | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this should be made an API of the Function object, like disasm? It will be more useful across pwntools. |
||||||||
|
|
||||||||
| def find_ret_main_addr(lines, calls): | ||||||||
| exit_calls = [index for index, line in enumerate(calls) if exit_addr in line[1]] | ||||||||
| if len(exit_calls) != 1: | ||||||||
| filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)) | ||||||||
|
|
||||||||
| if self.arch in ['ppc', 'powerpc', 'powerpc64']: | ||||||||
| filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if x.mnemonic in ['bctrl', 'bl']) | ||||||||
| # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha | ||||||||
| elif self.arch in ['mips', 'mips64']: | ||||||||
| filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if x.mnemonic in ['bal', 'jalr']) | ||||||||
|
|
||||||||
| calls = list(filter_calls(dis)) | ||||||||
|
|
||||||||
| def find_ret_main_addr(caller_dis, calls): | ||||||||
| call_to_main = -1 | ||||||||
| for i, insn in calls: | ||||||||
| if cs.CS_GRP_CALL in insn.groups and insn.operands[imm_index].imm == exit_addr: | ||||||||
| break | ||||||||
| call_to_main = i | ||||||||
| else: | ||||||||
| return 0 | ||||||||
|
|
||||||||
| call_to_main = calls[exit_calls[0] - 1] | ||||||||
| return_from_main = lines[call_to_main[0] + call_return_offset].lstrip() | ||||||||
| return_from_main = int(return_from_main[ : return_from_main.index(':') ], 16) | ||||||||
| return return_from_main | ||||||||
| return_from_main = caller_dis[call_to_main + call_return_offset] | ||||||||
| return return_from_main.address | ||||||||
|
|
||||||||
| # Starting with glibc-2.34 calling `main` is split out into `__libc_start_call_main` | ||||||||
| ret_addr = find_ret_main_addr(lines, calls) | ||||||||
| ret_addr = find_ret_main_addr(dis, calls) | ||||||||
| # Pre glibc-2.34 case - `main` is called directly | ||||||||
| if ret_addr: | ||||||||
| return ret_addr | ||||||||
|
|
||||||||
| # `__libc_start_main` -> `__libc_start_call_main` -> `main` | ||||||||
| # Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`. | ||||||||
| direct_call_pattern = re.compile(r'['+r'|'.join(call_instructions)+r']\s+(0x[0-9a-zA-Z]+)') | ||||||||
| for line in calls: | ||||||||
| match = direct_call_pattern.search(line[1]) | ||||||||
| if not match: | ||||||||
| continue | ||||||||
| for _, insn in calls: | ||||||||
| op = insn.operands[imm_index] | ||||||||
| if op.type != cs.CS_OP_IMM: continue | ||||||||
|
|
||||||||
| target_addr = op.imm | ||||||||
| callee_dis = list(self.cs_disasm(md, target_addr, callee_size)) | ||||||||
| callee_calls = filter_calls(callee_dis) | ||||||||
|
|
||||||||
| target_addr = int(match.group(1), 0) | ||||||||
| # `__libc_start_call_main` is usually smaller than `__libc_start_main`, so | ||||||||
| # we might disassemble a bit too much, but it's a good dynamic estimate. | ||||||||
| callee_lines = self.disasm(target_addr, self.functions['__libc_start_main'].size).split('\n') | ||||||||
| callee_calls = [(index, line) for index, line in enumerate(callee_lines) if set(line.split()) & call_instructions] | ||||||||
| ret_addr = find_ret_main_addr(callee_lines, callee_calls) | ||||||||
| ret_addr = find_ret_main_addr(callee_dis, callee_calls) | ||||||||
| if ret_addr: | ||||||||
| return ret_addr | ||||||||
| return 0 | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like it.