Skip to content

Commit df2cb97

Browse files
committed
fix: use detected ELF architecture instead of hardcoded ARM
process_elf_file() hardcoded ARM:LE:32:Cortex as the -processor flag for Ghidra, causing all x86-64 ELF files to be decompiled with the wrong architecture (producing garbage halt_baddata output). Now calls detect_elf_architecture() to determine the correct processor ID and compiler spec from the ELF header. Also fix format.sh to use python3 instead of python for Ubuntu 24.04 compatibility. test: add regression tests for ELF architecture detection in process_elf_file 6 new tests verify that the Ghidra command line receives the correct -processor flag based on actual ELF architecture (x86-64, ARM32, AARCH64, unknown).
1 parent 9d6f73f commit df2cb97

3 files changed

Lines changed: 249 additions & 12 deletions

File tree

format.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,16 @@ ALL_PASSED=true
5656
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
5757
echo -e "${BLUE} Import Sorting (isort)${NC}"
5858
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
59-
if command -v isort &> /dev/null || python -m isort --version &> /dev/null; then
59+
if command -v isort &> /dev/null || python3 -m isort --version &> /dev/null; then
6060
if $CHECK_ONLY; then
61-
if python -m isort --check-only --profile black $FILES; then
61+
if python3 -m isort --check-only --profile black $FILES; then
6262
echo -e "${GREEN}✓ isort: passed${NC}"
6363
else
6464
echo -e "${RED}✗ isort: issues found${NC}"
6565
ALL_PASSED=false
6666
fi
6767
else
68-
python -m isort --profile black $FILES
68+
python3 -m isort --profile black $FILES
6969
echo -e "${GREEN}✓ isort: completed${NC}"
7070
fi
7171
else
@@ -77,16 +77,16 @@ echo ""
7777
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
7878
echo -e "${BLUE} Code Formatting (black)${NC}"
7979
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
80-
if command -v black &> /dev/null || python -m black --version &> /dev/null; then
80+
if command -v black &> /dev/null || python3 -m black --version &> /dev/null; then
8181
if $CHECK_ONLY; then
82-
if python -m black --check $FILES; then
82+
if python3 -m black --check $FILES; then
8383
echo -e "${GREEN}✓ black: passed${NC}"
8484
else
8585
echo -e "${RED}✗ black: issues found${NC}"
8686
ALL_PASSED=false
8787
fi
8888
else
89-
python -m black $FILES
89+
python3 -m black $FILES
9090
echo -e "${GREEN}✓ black: completed${NC}"
9191
fi
9292
else
@@ -98,8 +98,8 @@ echo ""
9898
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
9999
echo -e "${BLUE} Linting (flake8)${NC}"
100100
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
101-
if command -v flake8 &> /dev/null || python -m flake8 --version &> /dev/null; then
102-
if python -m flake8 --max-line-length 88 --extend-ignore E203,E501,W503 $FILES; then
101+
if command -v flake8 &> /dev/null || python3 -m flake8 --version &> /dev/null; then
102+
if python3 -m flake8 --max-line-length 88 --extend-ignore E203,E501,W503 $FILES; then
103103
echo -e "${GREEN}✓ flake8: no issues${NC}"
104104
else
105105
echo -e "${RED}✗ flake8: issues found${NC}"

libsurgeon.py

100755100644
Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,17 +1164,31 @@ def process_elf_file(
11641164
log_step("Running Ghidra analysis & decompilation...")
11651165
print()
11661166

1167+
# Detect architecture
1168+
arch_info = detect_elf_architecture(elf_path)
1169+
if arch_info:
1170+
processor_id, cspec = arch_info
1171+
log_info(f"Detected architecture: {processor_id} (cspec: {cspec})")
1172+
else:
1173+
processor_id, cspec = None, None
1174+
log_warn("Could not detect architecture, letting Ghidra auto-detect")
1175+
11671176
# Build Ghidra command
11681177
cmd = [
11691178
ghidra_headless,
11701179
temp_project,
11711180
"elf_project",
11721181
"-import",
11731182
elf_path,
1174-
"-processor",
1175-
"ARM:LE:32:Cortex",
1176-
"-cspec",
1177-
"default",
1183+
]
1184+
1185+
# Add processor/cspec only if detected (otherwise let Ghidra auto-detect)
1186+
if processor_id:
1187+
cmd += ["-processor", processor_id]
1188+
if cspec:
1189+
cmd += ["-cspec", cspec]
1190+
1191+
cmd += [
11781192
"-postScript",
11791193
decompile_script,
11801194
output_dir,

tests/test_process_elf.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
LibSurgeon Test Suite - ELF Processing Pipeline Tests
5+
6+
Tests that process_elf_file correctly uses detected architecture
7+
when building the Ghidra command line, rather than hardcoding a
8+
specific processor.
9+
10+
This test was added after a bug where process_elf_file hardcoded
11+
ARM:LE:32:Cortex as the processor, causing all x86-64 ELF files
12+
to be decompiled with the wrong architecture (producing garbage
13+
halt_baddata() output).
14+
"""
15+
16+
import os
17+
from unittest.mock import patch
18+
19+
import pytest # noqa: F401 - used by fixtures
20+
21+
from libsurgeon import (
22+
detect_elf_architecture,
23+
process_elf_file,
24+
)
25+
26+
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
27+
28+
29+
class TestProcessElfArchitectureDetection:
30+
"""
31+
Tests that process_elf_file passes the correct -processor flag
32+
to Ghidra based on the actual ELF architecture.
33+
34+
This is a regression test for the bug where ARM:LE:32:Cortex was
35+
hardcoded, causing x86-64 binaries to be decompiled as ARM.
36+
"""
37+
38+
def _get_ghidra_cmd_from_process_elf(self, elf_path, temp_dir):
39+
"""
40+
Helper: call process_elf_file with a mocked subprocess.Popen
41+
and capture the command line that would be passed to Ghidra.
42+
"""
43+
captured_cmd = {}
44+
45+
class FakePopen:
46+
def __init__(self, cmd, **kwargs):
47+
captured_cmd["cmd"] = cmd
48+
self.returncode = 0
49+
self.stdout = iter(
50+
[
51+
"INFO ANALYZING\n",
52+
"INFO REPORT: Analysis succeeded\n",
53+
]
54+
)
55+
56+
def wait(self):
57+
pass
58+
59+
fake_ghidra_dir = os.path.join(temp_dir, "fake_ghidra")
60+
os.makedirs(os.path.join(fake_ghidra_dir, "support"), exist_ok=True)
61+
headless_path = os.path.join(fake_ghidra_dir, "support", "analyzeHeadless")
62+
with open(headless_path, "w") as f:
63+
f.write("#!/bin/bash\nexit 0\n")
64+
os.chmod(headless_path, 0o755)
65+
66+
# Also need the ghidra_decompile_elf.py script to exist
67+
script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
68+
assert os.path.isfile(
69+
os.path.join(script_dir, "ghidra_decompile_elf.py")
70+
), "ghidra_decompile_elf.py not found"
71+
72+
with patch("subprocess.Popen", FakePopen):
73+
process_elf_file(
74+
elf_path=elf_path,
75+
output_base=os.path.join(temp_dir, "output"),
76+
ghidra_path=fake_ghidra_dir,
77+
strategy="prefix",
78+
timeout=60,
79+
evaluate=False,
80+
)
81+
82+
return captured_cmd.get("cmd")
83+
84+
def test_x86_64_elf_uses_x86_64_processor(self, temp_dir):
85+
"""
86+
Regression test: an x86-64 ELF must NOT be decompiled with ARM processor.
87+
process_elf_file should detect x86-64 and pass -processor x86:LE:64:default.
88+
"""
89+
so_file = os.path.join(FIXTURES_DIR, "libtest.so")
90+
if not os.path.isfile(so_file):
91+
pytest.skip("Test fixture libtest.so not found (run build_fixtures.sh)")
92+
93+
# Verify the fixture is actually x86-64
94+
arch = detect_elf_architecture(so_file)
95+
assert arch is not None, "Failed to detect architecture of libtest.so"
96+
processor, cspec = arch
97+
assert (
98+
"x86" in processor and "64" in processor
99+
), f"Expected x86-64 fixture, got {processor}"
100+
101+
cmd = self._get_ghidra_cmd_from_process_elf(so_file, temp_dir)
102+
assert cmd is not None, "Ghidra command was not captured"
103+
104+
# The command must contain -processor with x86:LE:64:default
105+
assert (
106+
"-processor" in cmd
107+
), "process_elf_file did not pass -processor flag to Ghidra"
108+
proc_idx = cmd.index("-processor")
109+
actual_processor = cmd[proc_idx + 1]
110+
assert "x86" in actual_processor and "64" in actual_processor, (
111+
f"Expected x86:LE:64:default processor for x86-64 ELF, "
112+
f"got '{actual_processor}'. "
113+
f"This is the ARM hardcoding bug!"
114+
)
115+
# Must NOT contain ARM
116+
assert "ARM" not in actual_processor, (
117+
f"x86-64 ELF is being decompiled as ARM: '{actual_processor}'. "
118+
f"This is the ARM hardcoding bug!"
119+
)
120+
121+
def test_x86_64_elf_executable_uses_x86_64_processor(self, temp_dir):
122+
"""Same test but with an ELF executable instead of shared library."""
123+
elf_file = os.path.join(FIXTURES_DIR, "test_program.elf")
124+
if not os.path.isfile(elf_file):
125+
pytest.skip("Test fixture test_program.elf not found")
126+
127+
cmd = self._get_ghidra_cmd_from_process_elf(elf_file, temp_dir)
128+
assert cmd is not None
129+
130+
assert "-processor" in cmd
131+
proc_idx = cmd.index("-processor")
132+
actual_processor = cmd[proc_idx + 1]
133+
assert "x86" in actual_processor and "64" in actual_processor
134+
assert "ARM" not in actual_processor
135+
136+
def test_synthetic_arm32_elf_uses_arm_processor(self, temp_dir):
137+
"""An ARM 32-bit ELF should use an ARM processor ID."""
138+
# Create a synthetic ARM32 LE ELF header
139+
elf_file = os.path.join(temp_dir, "arm32_test.so")
140+
with open(elf_file, "wb") as f:
141+
f.write(b"\x7fELF") # magic
142+
f.write(b"\x01") # 32-bit
143+
f.write(b"\x01") # little endian
144+
f.write(b"\x01\x00\x00") # version + padding
145+
f.write(b"\x00" * 7) # padding
146+
f.write(b"\x03\x00") # e_type = ET_DYN (shared object)
147+
f.write(b"\x28\x00") # e_machine = ARM
148+
f.write(b"\x00" * 100) # rest of header
149+
150+
cmd = self._get_ghidra_cmd_from_process_elf(elf_file, temp_dir)
151+
assert cmd is not None
152+
153+
assert "-processor" in cmd
154+
proc_idx = cmd.index("-processor")
155+
actual_processor = cmd[proc_idx + 1]
156+
assert (
157+
"ARM" in actual_processor
158+
), f"Expected ARM processor for ARM ELF, got '{actual_processor}'"
159+
160+
def test_synthetic_aarch64_elf_uses_aarch64_processor(self, temp_dir):
161+
"""An AARCH64 ELF should use an AARCH64 processor ID."""
162+
elf_file = os.path.join(temp_dir, "aarch64_test.so")
163+
with open(elf_file, "wb") as f:
164+
f.write(b"\x7fELF")
165+
f.write(b"\x02") # 64-bit
166+
f.write(b"\x01") # little endian
167+
f.write(b"\x01\x00\x00")
168+
f.write(b"\x00" * 7)
169+
f.write(b"\x03\x00") # e_type = ET_DYN
170+
f.write(b"\xb7\x00") # e_machine = AARCH64
171+
f.write(b"\x00" * 100)
172+
173+
cmd = self._get_ghidra_cmd_from_process_elf(elf_file, temp_dir)
174+
assert cmd is not None
175+
176+
assert "-processor" in cmd
177+
proc_idx = cmd.index("-processor")
178+
actual_processor = cmd[proc_idx + 1]
179+
assert "AARCH64" in actual_processor
180+
181+
def test_processor_and_cspec_both_passed(self, temp_dir):
182+
"""Both -processor and -cspec should be passed when architecture is detected."""
183+
so_file = os.path.join(FIXTURES_DIR, "libtest.so")
184+
if not os.path.isfile(so_file):
185+
pytest.skip("Test fixture libtest.so not found")
186+
187+
cmd = self._get_ghidra_cmd_from_process_elf(so_file, temp_dir)
188+
assert cmd is not None
189+
190+
assert "-processor" in cmd, "Missing -processor flag"
191+
assert "-cspec" in cmd, "Missing -cspec flag"
192+
193+
cspec_idx = cmd.index("-cspec")
194+
actual_cspec = cmd[cspec_idx + 1]
195+
assert (
196+
actual_cspec == "gcc"
197+
), f"Expected cspec 'gcc' for x86-64 ELF, got '{actual_cspec}'"
198+
199+
def test_unknown_arch_omits_processor_flag(self, temp_dir):
200+
"""
201+
When architecture cannot be detected, -processor should be omitted
202+
to let Ghidra auto-detect.
203+
"""
204+
# Create a file with unknown machine type
205+
elf_file = os.path.join(temp_dir, "unknown_arch.so")
206+
with open(elf_file, "wb") as f:
207+
f.write(b"\x7fELF")
208+
f.write(b"\x02") # 64-bit
209+
f.write(b"\x01") # little endian
210+
f.write(b"\x01\x00\x00")
211+
f.write(b"\x00" * 7)
212+
f.write(b"\x03\x00") # e_type
213+
f.write(b"\xff\xff") # e_machine = unknown
214+
f.write(b"\x00" * 100)
215+
216+
cmd = self._get_ghidra_cmd_from_process_elf(elf_file, temp_dir)
217+
assert cmd is not None
218+
219+
# Should NOT have -processor since arch is unknown
220+
assert (
221+
"-processor" not in cmd
222+
), "Should not pass -processor for unknown architecture"
223+
assert "-cspec" not in cmd, "Should not pass -cspec for unknown architecture"

0 commit comments

Comments
 (0)