Skip to content

Commit 17a16c1

Browse files
timsherwoodclaude
andcommitted
Add Keystone-based toolchain for multi-ISA assembly
Adds a pure Python assembler using Keystone Engine that supports RISC-V 64-bit, ARM64, x86-64, and MIPS32. This eliminates the need for students to install external cross-compiler toolchains. - New toolchain module with GNU-as compatible directive parser - ELF builder for generating executable files - CLI tool (mapachespim-as) for command-line assembly - Fixed ARM64 immediate parsing (# not treated as comment) - Added comprehensive tests for toolchain and MIPS backend Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4cc6ea5 commit 17a16c1

12 files changed

Lines changed: 2575 additions & 3 deletions

File tree

docs/Mapache.png

678 KB
Loading

mapachespim/elf_loader.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class ISA(IntEnum):
3030
RISCV = 0
3131
ARM = 1
3232
X86_64 = 2
33+
MIPS = 3
3334
UNKNOWN = -1
3435

3536

@@ -41,6 +42,8 @@ class Architecture(IntEnum):
4142
ARM32 = 2
4243
ARM64 = 3
4344
X86_64 = 4
45+
MIPS32 = 5
46+
MIPS64 = 6
4447
UNKNOWN = -1
4548

4649

@@ -86,6 +89,8 @@ def detect_isa_from_elf(elf: ELFFileType) -> ISA:
8689
return ISA.ARM # 32-bit ARM (we'll treat as ARM for now)
8790
elif machine == "EM_X86_64":
8891
return ISA.X86_64
92+
elif machine == "EM_MIPS":
93+
return ISA.MIPS
8994
else:
9095
return ISA.UNKNOWN
9196

@@ -116,6 +121,11 @@ def detect_architecture(elf: ELFFileType, isa: ISA) -> Architecture:
116121
elif isa == ISA.X86_64:
117122
# x86-64 is always 64-bit
118123
return Architecture.X86_64
124+
elif isa == ISA.MIPS:
125+
if elf_class == "ELFCLASS64":
126+
return Architecture.MIPS64
127+
elif elf_class == "ELFCLASS32":
128+
return Architecture.MIPS32
119129

120130
return Architecture.UNKNOWN
121131

mapachespim/toolchain/__init__.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
"""
2+
MapacheSPIM Toolchain - Pure Python assembler and ELF generator
3+
4+
This module provides a complete toolchain for assembling source files
5+
to ELF executables, eliminating the need for external cross-compilers.
6+
7+
Supports: RISC-V 64-bit, ARM64, x86-64, MIPS32
8+
"""
9+
10+
from __future__ import annotations
11+
12+
from dataclasses import dataclass, field
13+
from pathlib import Path
14+
from typing import Dict, List, Optional, Union
15+
16+
from .assembler import Assembler
17+
from .elf_builder import ELFBuilder
18+
from .memory_map import MEMORY_LAYOUTS, MemoryLayout
19+
20+
__all__ = [
21+
"assemble",
22+
"assemble_file",
23+
"AssemblyResult",
24+
"Assembler",
25+
"ELFBuilder",
26+
"MemoryLayout",
27+
"MEMORY_LAYOUTS",
28+
]
29+
30+
31+
@dataclass
32+
class AssemblyResult:
33+
"""Result of an assembly operation."""
34+
35+
elf_bytes: bytes
36+
"""The assembled ELF file as bytes."""
37+
38+
symbols: Dict[str, int] = field(default_factory=dict)
39+
"""Map of symbol names to addresses."""
40+
41+
errors: List[str] = field(default_factory=list)
42+
"""List of error messages (empty if successful)."""
43+
44+
warnings: List[str] = field(default_factory=list)
45+
"""List of warning messages."""
46+
47+
isa: str = ""
48+
"""The ISA used for assembly."""
49+
50+
entry_point: int = 0
51+
"""Entry point address."""
52+
53+
@property
54+
def success(self) -> bool:
55+
"""Return True if assembly succeeded (no errors)."""
56+
return len(self.errors) == 0 and len(self.elf_bytes) > 0
57+
58+
59+
def assemble(
60+
source: str,
61+
isa: str,
62+
entry_symbol: str = "_start",
63+
) -> AssemblyResult:
64+
"""
65+
Assemble source code into an ELF executable.
66+
67+
Args:
68+
source: Assembly source code as a string.
69+
isa: Target ISA - one of "riscv64", "arm64", "x86_64", "mips32".
70+
entry_symbol: Entry point symbol name (default: "_start").
71+
72+
Returns:
73+
AssemblyResult containing the ELF bytes, symbols, and any errors.
74+
75+
Example:
76+
>>> result = assemble('''
77+
... .text
78+
... .globl _start
79+
... _start:
80+
... addi x5, x0, 42
81+
... ecall
82+
... ''', isa="riscv64")
83+
>>> if result.success:
84+
... with open("program.elf", "wb") as f:
85+
... f.write(result.elf_bytes)
86+
"""
87+
try:
88+
assembler = Assembler(isa)
89+
return assembler.assemble(source, entry_symbol=entry_symbol)
90+
except Exception as e:
91+
return AssemblyResult(
92+
elf_bytes=b"",
93+
errors=[str(e)],
94+
isa=isa,
95+
)
96+
97+
98+
def assemble_file(
99+
source_path: Union[str, Path],
100+
output_path: Optional[Union[str, Path]] = None,
101+
isa: Optional[str] = None,
102+
entry_symbol: str = "_start",
103+
) -> AssemblyResult:
104+
"""
105+
Assemble a source file into an ELF executable.
106+
107+
Args:
108+
source_path: Path to the assembly source file.
109+
output_path: Path for the output ELF file. If None, uses source
110+
path with .elf extension.
111+
isa: Target ISA. If None, attempts to auto-detect from source.
112+
entry_symbol: Entry point symbol name (default: "_start").
113+
114+
Returns:
115+
AssemblyResult containing the ELF bytes, symbols, and any errors.
116+
If output_path is provided, also writes the ELF to disk.
117+
118+
Example:
119+
>>> result = assemble_file("program.s", "program.elf", isa="riscv64")
120+
>>> if result.success:
121+
... print(f"Assembled to {result.entry_point:#x}")
122+
"""
123+
source_path = Path(source_path)
124+
125+
if not source_path.exists():
126+
return AssemblyResult(
127+
elf_bytes=b"",
128+
errors=[f"Source file not found: {source_path}"],
129+
)
130+
131+
# Read source file
132+
try:
133+
source = source_path.read_text()
134+
except Exception as e:
135+
return AssemblyResult(
136+
elf_bytes=b"",
137+
errors=[f"Failed to read source file: {e}"],
138+
)
139+
140+
# Auto-detect ISA if not specified
141+
if isa is None:
142+
isa = _detect_isa_from_source(source, source_path)
143+
if isa is None:
144+
return AssemblyResult(
145+
elf_bytes=b"",
146+
errors=["Could not auto-detect ISA. Please specify --isa."],
147+
)
148+
149+
# Assemble
150+
result = assemble(source, isa=isa, entry_symbol=entry_symbol)
151+
152+
# Write output if successful and path provided
153+
if result.success and output_path is not None:
154+
output_path = Path(output_path)
155+
try:
156+
output_path.write_bytes(result.elf_bytes)
157+
except Exception as e:
158+
result.errors.append(f"Failed to write output file: {e}")
159+
160+
return result
161+
162+
163+
def _detect_isa_from_source(source: str, path: Path) -> Optional[str]:
164+
"""
165+
Attempt to detect ISA from source code or file path.
166+
167+
Detection heuristics:
168+
1. Look for .arch or .isa directive
169+
2. Check for ISA-specific instructions
170+
3. Check directory name hints
171+
"""
172+
source_lower = source.lower()
173+
174+
# Check for explicit ISA directives
175+
if ".arch rv64" in source_lower or ".riscv" in source_lower:
176+
return "riscv64"
177+
if ".arch armv8" in source_lower or ".aarch64" in source_lower:
178+
return "arm64"
179+
if ".code64" in source_lower or ".intel_syntax" in source_lower:
180+
return "x86_64"
181+
if ".mips" in source_lower or ".set mips" in source_lower:
182+
return "mips32"
183+
184+
# Check for ISA-specific instructions
185+
if "ecall" in source_lower or "addi " in source_lower:
186+
return "riscv64"
187+
if "svc " in source_lower or "mov x" in source_lower:
188+
return "arm64"
189+
if "syscall" in source_lower or "mov %rax" in source_lower:
190+
return "x86_64"
191+
if "syscall" in source_lower and "$v0" in source_lower:
192+
return "mips32"
193+
194+
# Check directory path for hints
195+
path_str = str(path).lower()
196+
if "riscv" in path_str or "rv64" in path_str:
197+
return "riscv64"
198+
if "arm" in path_str or "aarch64" in path_str:
199+
return "arm64"
200+
if "x86" in path_str or "x64" in path_str:
201+
return "x86_64"
202+
if "mips" in path_str:
203+
return "mips32"
204+
205+
return None

0 commit comments

Comments
 (0)