Skip to content

Commit 728d364

Browse files
committed
mega65: more examples port
ref.: llvm-mos/llvm-mos-sdk#429 by @mlund
1 parent 771e016 commit 728d364

3 files changed

Lines changed: 778 additions & 1 deletion

File tree

build.zig

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,8 +642,27 @@ pub fn build(b: *std.Build) void {
642642
b.getInstallStep().dependOn(&install.step);
643643
run_bininfo.addFileArg(exe.getEmittedBin());
644644
}
645+
{
646+
const step = b.step("mega65-mandelbrot", "Build MEGA65 Mandelbrot FCM fractal");
647+
// Debug triggers MOS stack-protector (SSP) lowering failure on compute-heavy code.
648+
const exe = addMega65Exe(b, sdk_dep, sdk_src, sdk_libs.mega65 orelse @panic("mega65 libs not built"), m65_dep, if (optimize == .Debug) .ReleaseSmall else optimize, "mandelbrot", "mega65/mandelbrot/mandelbrot.zig");
649+
exe.root_module.addImport("mega65", mega65_mod);
650+
const install = b.addInstallArtifact(exe, .{ .dest_sub_path = "mega65-mandelbrot.prg" });
651+
step.dependOn(&install.step);
652+
b.getInstallStep().dependOn(&install.step);
653+
run_bininfo.addFileArg(exe.getEmittedBin());
654+
}
655+
{
656+
const step = b.step("mega65-vector-logo", "Build MEGA65 rotating LLVM-MOS wireframe logo");
657+
const exe = addMega65Exe(b, sdk_dep, sdk_src, sdk_libs.mega65 orelse @panic("mega65 libs not built"), m65_dep, optimize, "vector_logo", "mega65/vector_logo/vector_logo.zig");
658+
exe.root_module.addImport("mega65", mega65_mod);
659+
const install = b.addInstallArtifact(exe, .{ .dest_sub_path = "mega65-vector-logo.prg" });
660+
step.dependOn(&install.step);
661+
b.getInstallStep().dependOn(&install.step);
662+
run_bininfo.addFileArg(exe.getEmittedBin());
663+
}
645664
} else {
646-
inline for (.{ "mega65-hello", "mega65-plasma", "mega65-viciv" }) |name| {
665+
inline for (.{ "mega65-hello", "mega65-plasma", "mega65-viciv", "mega65-mandelbrot", "mega65-vector-logo" }) |name| {
647666
_ = b.step(name, "Build MEGA65 example (fetching mega65-libc, re-run to build)");
648667
}
649668
}

mega65/mandelbrot/mandelbrot.zig

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
// Copyright (c) 2024 Matheus C. França
2+
// SPDX-License-Identifier: Apache-2.0
3+
//! MEGA65 Mandelbrot FCM: Full Color Mode 320×200 escape-time fractal.
4+
//! Per-pixel palette via VIC-IV CHR16+FCLRHI; 32×32 hardware math; Enhanced DMA; 40 MHz.
5+
pub const panic = @import("mos_panic");
6+
const mega65 = @import("mega65");
7+
8+
// ── Fixed-point type (8.8: high byte = integer, low byte = fraction) ──────────
9+
const Fix16 = i16;
10+
const FP_ONE: Fix16 = 256;
11+
12+
// ── Fractal and screen geometry ───────────────────────────────────────────────
13+
const MAX_ITER: u8 = 32;
14+
const CELL_COLS: u8 = 40;
15+
const CELL_ROWS: u8 = 25;
16+
const TILE_PIXELS: u8 = 8;
17+
const TILE_BYTES: u16 = @as(u16, TILE_PIXELS) * TILE_PIXELS; // 64
18+
const TILE_ROW_BYTES: u16 = @as(u16, CELL_COLS) * TILE_BYTES; // 2560
19+
const NUM_CELLS: u16 = @as(u16, CELL_COLS) * CELL_ROWS; // 1000
20+
21+
// FCM tile base: charptr=0, so tile N → address N×64. $40000/64 = 4096 = 0x1000.
22+
const GFX_ADDR: u32 = 0x40000;
23+
const TILE_BASE: u16 = 0x1000;
24+
25+
// ── VIC-IV register file at $D000 ─────────────────────────────────────────────
26+
const vic: *volatile mega65.__vic4 = @ptrFromInt(0xd000);
27+
28+
// ── VIC mask constants (from _vic4.h / _vic3.h) ───────────────────────────────
29+
const VIC3_FAST_MASK: u8 = 0x40;
30+
const VIC3_ATTR_MASK: u8 = 0x20;
31+
const VIC3_H640_MASK: u8 = 0x80;
32+
const VIC3_V400_MASK: u8 = 0x08;
33+
const VIC3_PAL_MASK: u8 = 0x04;
34+
const VIC4_CHR16_MASK: u8 = 0x01;
35+
const VIC4_FCLRLO_MASK: u8 = 0x02;
36+
const VIC4_FCLRHI_MASK: u8 = 0x04;
37+
const VIC4_HOTREG_MASK: u8 = 0x80;
38+
39+
// ── Hardware math accelerator at $D768 ───────────────────────────────────────
40+
// Combinational 32×32→64-bit multiplier; result updates as soon as both inputs written.
41+
const math_a: *volatile i32 = @ptrFromInt(0xd768); // multina32
42+
const math_b: *volatile i32 = @ptrFromInt(0xd76c); // multinb32
43+
const math_out: *volatile i32 = @ptrFromInt(0xd770); // multout32 (lower 32 of 64-bit)
44+
45+
// ── Palette RAM at $D100 ──────────────────────────────────────────────────────
46+
const pal_red: [*]volatile u8 = @ptrFromInt(0xd100);
47+
const pal_green: [*]volatile u8 = @ptrFromInt(0xd200);
48+
const pal_blue: [*]volatile u8 = @ptrFromInt(0xd300);
49+
50+
// ── Screen and color RAM ──────────────────────────────────────────────────────
51+
const screen16: [*]volatile u16 = @ptrFromInt(0x0800);
52+
const color_ram: [*]volatile u8 = @ptrFromInt(0xd800);
53+
54+
// ── CPU port DDR at $0000 (write 65 for 40 MHz full-speed mode) ───────────────
55+
const cpu_portddr: *allowzero volatile u8 = @ptrFromInt(0x0000);
56+
57+
// ── VIC-IV sub-registers (inside anonymous union sub-structs; accessed directly) ─
58+
const scrnptr_lsb: *volatile u8 = @ptrFromInt(0xd060);
59+
const scrnptr_msb: *volatile u8 = @ptrFromInt(0xd061);
60+
const scrnptr_bnk: *volatile u8 = @ptrFromInt(0xd062);
61+
const scrnptr_mb: *volatile u8 = @ptrFromInt(0xd063);
62+
const charptr_lsb: *volatile u8 = @ptrFromInt(0xd068);
63+
const charptr_msb: *volatile u8 = @ptrFromInt(0xd069);
64+
const charptr_bnk: *volatile u8 = @ptrFromInt(0xd06a);
65+
66+
// ── DMA option byte constants ─────────────────────────────────────────────────
67+
const ENABLE_F018B_OPT: u8 = 0x0b;
68+
const SRC_ADDR_BITS_OPT: u8 = 0x80;
69+
const DST_ADDR_BITS_OPT: u8 = 0x81;
70+
const DST_SKIP_RATE_OPT: u8 = 0x85;
71+
const DMA_COPY_CMD: u8 = 0x00;
72+
const DMA_FILL_CMD: u8 = 0x03;
73+
74+
// ── F018B DMA list (12 bytes, packed = no padding between fields) ─────────────
75+
const DMAList_F018B = packed struct {
76+
command: u8,
77+
count: u16,
78+
source_addr: u16,
79+
source_bank: u8,
80+
dest_addr: u16,
81+
dest_bank: u8,
82+
command_msb: u8,
83+
modulo: u16,
84+
};
85+
86+
// ── Enhanced DMA job: 7 option bytes + end byte + F018B list ─────────────────
87+
const DmaJob = packed struct {
88+
opt0: u8,
89+
opt1: u8,
90+
opt2: u8,
91+
opt3: u8,
92+
opt4: u8,
93+
opt5: u8,
94+
opt6: u8,
95+
end_option: u8,
96+
dmalist: DMAList_F018B,
97+
};
98+
99+
// ── Tile row buffer: CPU-side staging area, one row at a time (BSS) ───────────
100+
var tile_row_buf: [TILE_ROW_BYTES]u8 = undefined;
101+
102+
// ── Comptime palette: blue→cyan→green→yellow→red over MAX_ITER steps ──────────
103+
// Entry 0 = black (interior / set member). Entries 1..MAX_ITER = escaped pixels.
104+
// Reversed-nybble encoding: 4-bit intensity n → (n<<4)|n for full brightness.
105+
const SEGMENT_LEN: u8 = 8;
106+
const MAX_INTENSITY: u8 = 15;
107+
108+
fn nyb(n: u8) u8 {
109+
return (n << 4) | n;
110+
}
111+
112+
const Palette = struct { r: [MAX_ITER + 1]u8, g: [MAX_ITER + 1]u8, b: [MAX_ITER + 1]u8 };
113+
114+
const palette: Palette = blk: {
115+
var p = Palette{
116+
.r = @splat(0),
117+
.g = @splat(0),
118+
.b = @splat(0),
119+
};
120+
var i: u8 = 0;
121+
while (i < MAX_ITER) : (i += 1) {
122+
const pos: u8 = i & (SEGMENT_LEN - 1);
123+
const v: u8 = @intCast(@as(u16, pos) * MAX_INTENSITY / (SEGMENT_LEN - 1));
124+
var rv: u8 = 0;
125+
var gv: u8 = 0;
126+
var bv: u8 = 0;
127+
if (i < SEGMENT_LEN) {
128+
gv = v;
129+
bv = MAX_INTENSITY;
130+
} else if (i < SEGMENT_LEN * 2) {
131+
gv = MAX_INTENSITY;
132+
bv = MAX_INTENSITY - v;
133+
} else if (i < SEGMENT_LEN * 3) {
134+
rv = v;
135+
gv = MAX_INTENSITY;
136+
} else {
137+
rv = MAX_INTENSITY;
138+
gv = MAX_INTENSITY - v;
139+
}
140+
p.r[i + 1] = nyb(rv);
141+
p.g[i + 1] = nyb(gv);
142+
p.b[i + 1] = nyb(bv);
143+
}
144+
break :blk p;
145+
};
146+
147+
// ── Hardware 8.8 fixed-point multiply ────────────────────────────────────────
148+
// Sign-extend 16-bit inputs to 32 bits; lower 32 bits of unsigned product
149+
// match the signed result for equal-width inputs. Shift right 8 → 8.8 result.
150+
inline fn fpMul(a: Fix16, b: Fix16) Fix16 {
151+
math_a.* = @as(i32, a);
152+
math_b.* = @as(i32, b);
153+
return @truncate(math_out.* >> 8);
154+
}
155+
156+
// ── Escape-time Mandelbrot iteration ─────────────────────────────────────────
157+
fn mandelbrot(cr: Fix16, ci: Fix16) u8 {
158+
const FP_FOUR: Fix16 = 4 * FP_ONE;
159+
var zr: Fix16 = 0;
160+
var zi: Fix16 = 0;
161+
var i: u8 = 0;
162+
while (i < MAX_ITER) : (i += 1) {
163+
const zr2 = fpMul(zr, zr);
164+
const zi2 = fpMul(zi, zi);
165+
if (zr2 + zi2 > FP_FOUR) return i;
166+
zi = fpMul(zr, zi);
167+
zi +%= zi; // 2·zr·zi (wrapping matches C int16_t)
168+
zi +%= ci;
169+
zr = zr2 - zi2 + cr;
170+
}
171+
return MAX_ITER;
172+
}
173+
174+
// ── Enhanced DMA helpers ──────────────────────────────────────────────────────
175+
fn triggerDma(job: *const DmaJob) void {
176+
// addr_msb must be written before trigger_enhanced (the write that starts DMA).
177+
const addr: u16 = @intCast(@intFromPtr(job));
178+
const dma_enable: *volatile u8 = @ptrFromInt(0xd703); // enable_f018b
179+
const dma_bank: *volatile u8 = @ptrFromInt(0xd702); // addr_bank
180+
const dma_msb: *volatile u8 = @ptrFromInt(0xd701); // addr_msb
181+
const dma_trigger: *volatile u8 = @ptrFromInt(0xd705); // trigger_enhanced
182+
dma_enable.* = 1;
183+
dma_bank.* = 0;
184+
dma_msb.* = @truncate(addr >> 8);
185+
dma_trigger.* = @truncate(addr); // triggers DMA
186+
}
187+
188+
fn makeDmaFill(dst: u32, value: u8, count: u16) DmaJob {
189+
return .{
190+
.opt0 = ENABLE_F018B_OPT,
191+
.opt1 = SRC_ADDR_BITS_OPT,
192+
.opt2 = 0,
193+
.opt3 = DST_ADDR_BITS_OPT,
194+
.opt4 = @truncate(dst >> 20),
195+
.opt5 = DST_SKIP_RATE_OPT,
196+
.opt6 = 1,
197+
.end_option = 0,
198+
.dmalist = .{
199+
.command = DMA_FILL_CMD,
200+
.count = count,
201+
.source_addr = value,
202+
.source_bank = 0,
203+
.dest_addr = @truncate(dst),
204+
.dest_bank = @truncate(dst >> 16),
205+
.command_msb = 0,
206+
.modulo = 0,
207+
},
208+
};
209+
}
210+
211+
fn makeDmaCopy(src: u32, dst: u32, count: u16) DmaJob {
212+
var job = makeDmaFill(dst, 0, count);
213+
job.opt2 = @truncate(src >> 20);
214+
job.dmalist.command = DMA_COPY_CMD;
215+
job.dmalist.source_addr = @truncate(src);
216+
job.dmalist.source_bank = @truncate(src >> 16);
217+
return job;
218+
}
219+
220+
// ── VIC-IV setup: FCM 320×200 mode ───────────────────────────────────────────
221+
fn setupVic() void {
222+
asm volatile ("sei");
223+
224+
vic.key = 0x47; // VIC-IV unlock knock sequence
225+
vic.key = 0x53;
226+
227+
// Disable hot registers so we can program VIC-IV directly
228+
(@as(*volatile u8, @ptrFromInt(0xd05d))).* &= ~VIC4_HOTREG_MASK;
229+
230+
// 40 MHz: POKE 0,65 via CPU port DDR (VIC3_FAST alone gives only 3.5 MHz)
231+
cpu_portddr.* = 65;
232+
233+
vic.ctrlb = (vic.ctrlb | VIC3_FAST_MASK | VIC3_ATTR_MASK) &
234+
~(VIC3_H640_MASK | VIC3_V400_MASK);
235+
236+
// CHR16 + FCLRHI: tiles with index > $FF use full-color per-pixel palette.
237+
// Our indices start at 0x1000, so every tile uses FCM.
238+
vic.ctrlc = (vic.ctrlc & ~VIC4_FCLRLO_MASK) | VIC4_CHR16_MASK | VIC4_FCLRHI_MASK;
239+
240+
// Screen RAM at $0800 (reuse KERNAL default — avoids relocating 2 KB)
241+
scrnptr_lsb.* = 0x00;
242+
scrnptr_msb.* = 0x08;
243+
scrnptr_bnk.* = 0x00;
244+
scrnptr_mb.* = 0x00;
245+
246+
// Tile data base at address 0; tile N maps to bytes [N×64 .. N×64+63]
247+
charptr_lsb.* = 0x00;
248+
charptr_msb.* = 0x00;
249+
charptr_bnk.* = 0x00;
250+
251+
// 80 bytes per row (CHR16: 2 bytes/cell × 40 cols), 40 chars, 25 rows
252+
vic.linestep = @as(u16, CELL_COLS) * 2;
253+
vic.chrcount = CELL_COLS;
254+
vic.disp_rows = CELL_ROWS;
255+
256+
// FCM is a text-mode extension — BMM must be off.
257+
// Preserve raster MSB ($C0), set DEN|RSEL|YSCROLL=3 ($1B).
258+
vic.ctrl1 = (vic.ctrl1 & 0xC0) | 0x1B;
259+
// Preserve unused high bits ($E0), set CSEL ($08).
260+
vic.ctrl2 = (vic.ctrl2 & 0xE0) | 0x08;
261+
262+
vic.bordercol = 0;
263+
vic.screencol = 0;
264+
265+
// Use palette RAM for colors 0-15 (16+ always use palette RAM)
266+
vic.ctrla |= VIC3_PAL_MASK;
267+
}
268+
269+
// ── Screen and tile memory initialization ─────────────────────────────────────
270+
fn setupScreen() void {
271+
// Upload the comptime palette into hardware palette RAM
272+
for (0..MAX_ITER + 1) |i| {
273+
pal_red[i] = palette.r[i];
274+
pal_green[i] = palette.g[i];
275+
pal_blue[i] = palette.b[i];
276+
}
277+
278+
// Each screen cell gets a unique tile index pointing into $40000 graphics area
279+
for (0..NUM_CELLS) |i| {
280+
screen16[i] = TILE_BASE + @as(u16, @intCast(i));
281+
}
282+
283+
// Neutral color RAM: prevent unwanted FCM flip/alpha attributes
284+
for (0..NUM_CELLS) |i| {
285+
color_ram[i] = 0;
286+
}
287+
288+
// Zero the whole graphics area so unrendered rows appear black
289+
const fill_job = makeDmaFill(GFX_ADDR, 0, NUM_CELLS * TILE_BYTES);
290+
triggerDma(&fill_job);
291+
}
292+
293+
// ── Fractal rendering: one tile row at a time, DMA-copied to $40000+ ──────────
294+
fn renderFractal() void {
295+
// View window: real ∈ [−2.0, 0.6], imag ∈ [−1.0, 1.0]
296+
const RE_MIN: Fix16 = -2 * FP_ONE;
297+
const RE_MAX: Fix16 = @intFromFloat(0.6 * 256.0);
298+
const IM_MIN: Fix16 = -FP_ONE;
299+
const IM_MAX: Fix16 = FP_ONE;
300+
const RE_STEP: Fix16 = (RE_MAX - RE_MIN) / 320;
301+
const IM_STEP: Fix16 = (IM_MAX - IM_MIN) / 200;
302+
303+
for (0..CELL_ROWS) |cy| {
304+
for (0..CELL_COLS) |cx| {
305+
const tile_off: u16 = @as(u16, @intCast(cx)) * TILE_BYTES;
306+
for (0..TILE_PIXELS) |py| {
307+
const y: Fix16 = @intCast(@as(u16, @intCast(cy)) * TILE_PIXELS + py);
308+
for (0..TILE_PIXELS) |px| {
309+
const x: Fix16 = @intCast(@as(u16, @intCast(cx)) * TILE_PIXELS + px);
310+
const cr: Fix16 = RE_MIN + x * RE_STEP;
311+
const ci: Fix16 = IM_MIN + y * IM_STEP;
312+
const iter = mandelbrot(cr, ci);
313+
tile_row_buf[tile_off + py * TILE_PIXELS + px] =
314+
if (iter >= MAX_ITER) 0 else iter + 1;
315+
}
316+
}
317+
}
318+
319+
// DMA-copy completed tile row from CPU RAM to graphics memory
320+
const src: u32 = @intFromPtr(&tile_row_buf);
321+
const dst: u32 = GFX_ADDR + @as(u32, @intCast(cy)) * TILE_ROW_BYTES;
322+
const copy_job = makeDmaCopy(src, dst, TILE_ROW_BYTES);
323+
triggerDma(&copy_job);
324+
}
325+
}
326+
327+
export fn main() void {
328+
setupVic();
329+
setupScreen();
330+
renderFractal();
331+
while (true) {}
332+
}

0 commit comments

Comments
 (0)