|
| 1 | +// Copyright (c) 2024 Matheus C. França |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +//! MEGA65 Mandelbrot FCM: Full Color Mode 320×200 escape-time fractal. |
| 4 | +//! Per-pixel palette via VIC-IV CHR16+FCLRHI; 32×32 hardware math; Enhanced DMA; 40 MHz. |
| 5 | +pub const panic = @import("mos_panic"); |
| 6 | +const mega65 = @import("mega65"); |
| 7 | + |
| 8 | +// ── Fixed-point type (8.8: high byte = integer, low byte = fraction) ────────── |
| 9 | +const Fix16 = i16; |
| 10 | +const FP_ONE: Fix16 = 256; |
| 11 | + |
| 12 | +// ── Fractal and screen geometry ─────────────────────────────────────────────── |
| 13 | +const MAX_ITER: u8 = 32; |
| 14 | +const CELL_COLS: u8 = 40; |
| 15 | +const CELL_ROWS: u8 = 25; |
| 16 | +const TILE_PIXELS: u8 = 8; |
| 17 | +const TILE_BYTES: u16 = @as(u16, TILE_PIXELS) * TILE_PIXELS; // 64 |
| 18 | +const TILE_ROW_BYTES: u16 = @as(u16, CELL_COLS) * TILE_BYTES; // 2560 |
| 19 | +const NUM_CELLS: u16 = @as(u16, CELL_COLS) * CELL_ROWS; // 1000 |
| 20 | + |
| 21 | +// FCM tile base: charptr=0, so tile N → address N×64. $40000/64 = 4096 = 0x1000. |
| 22 | +const GFX_ADDR: u32 = 0x40000; |
| 23 | +const TILE_BASE: u16 = 0x1000; |
| 24 | + |
| 25 | +// ── VIC-IV register file at $D000 ───────────────────────────────────────────── |
| 26 | +const vic: *volatile mega65.__vic4 = @ptrFromInt(0xd000); |
| 27 | + |
| 28 | +// ── VIC mask constants (from _vic4.h / _vic3.h) ─────────────────────────────── |
| 29 | +const VIC3_FAST_MASK: u8 = 0x40; |
| 30 | +const VIC3_ATTR_MASK: u8 = 0x20; |
| 31 | +const VIC3_H640_MASK: u8 = 0x80; |
| 32 | +const VIC3_V400_MASK: u8 = 0x08; |
| 33 | +const VIC3_PAL_MASK: u8 = 0x04; |
| 34 | +const VIC4_CHR16_MASK: u8 = 0x01; |
| 35 | +const VIC4_FCLRLO_MASK: u8 = 0x02; |
| 36 | +const VIC4_FCLRHI_MASK: u8 = 0x04; |
| 37 | +const VIC4_HOTREG_MASK: u8 = 0x80; |
| 38 | + |
| 39 | +// ── Hardware math accelerator at $D768 ─────────────────────────────────────── |
| 40 | +// Combinational 32×32→64-bit multiplier; result updates as soon as both inputs written. |
| 41 | +const math_a: *volatile i32 = @ptrFromInt(0xd768); // multina32 |
| 42 | +const math_b: *volatile i32 = @ptrFromInt(0xd76c); // multinb32 |
| 43 | +const math_out: *volatile i32 = @ptrFromInt(0xd770); // multout32 (lower 32 of 64-bit) |
| 44 | + |
| 45 | +// ── Palette RAM at $D100 ────────────────────────────────────────────────────── |
| 46 | +const pal_red: [*]volatile u8 = @ptrFromInt(0xd100); |
| 47 | +const pal_green: [*]volatile u8 = @ptrFromInt(0xd200); |
| 48 | +const pal_blue: [*]volatile u8 = @ptrFromInt(0xd300); |
| 49 | + |
| 50 | +// ── Screen and color RAM ────────────────────────────────────────────────────── |
| 51 | +const screen16: [*]volatile u16 = @ptrFromInt(0x0800); |
| 52 | +const color_ram: [*]volatile u8 = @ptrFromInt(0xd800); |
| 53 | + |
| 54 | +// ── CPU port DDR at $0000 (write 65 for 40 MHz full-speed mode) ─────────────── |
| 55 | +const cpu_portddr: *allowzero volatile u8 = @ptrFromInt(0x0000); |
| 56 | + |
| 57 | +// ── VIC-IV sub-registers (inside anonymous union sub-structs; accessed directly) ─ |
| 58 | +const scrnptr_lsb: *volatile u8 = @ptrFromInt(0xd060); |
| 59 | +const scrnptr_msb: *volatile u8 = @ptrFromInt(0xd061); |
| 60 | +const scrnptr_bnk: *volatile u8 = @ptrFromInt(0xd062); |
| 61 | +const scrnptr_mb: *volatile u8 = @ptrFromInt(0xd063); |
| 62 | +const charptr_lsb: *volatile u8 = @ptrFromInt(0xd068); |
| 63 | +const charptr_msb: *volatile u8 = @ptrFromInt(0xd069); |
| 64 | +const charptr_bnk: *volatile u8 = @ptrFromInt(0xd06a); |
| 65 | + |
| 66 | +// ── DMA option byte constants ───────────────────────────────────────────────── |
| 67 | +const ENABLE_F018B_OPT: u8 = 0x0b; |
| 68 | +const SRC_ADDR_BITS_OPT: u8 = 0x80; |
| 69 | +const DST_ADDR_BITS_OPT: u8 = 0x81; |
| 70 | +const DST_SKIP_RATE_OPT: u8 = 0x85; |
| 71 | +const DMA_COPY_CMD: u8 = 0x00; |
| 72 | +const DMA_FILL_CMD: u8 = 0x03; |
| 73 | + |
| 74 | +// ── F018B DMA list (12 bytes, packed = no padding between fields) ───────────── |
| 75 | +const DMAList_F018B = packed struct { |
| 76 | + command: u8, |
| 77 | + count: u16, |
| 78 | + source_addr: u16, |
| 79 | + source_bank: u8, |
| 80 | + dest_addr: u16, |
| 81 | + dest_bank: u8, |
| 82 | + command_msb: u8, |
| 83 | + modulo: u16, |
| 84 | +}; |
| 85 | + |
| 86 | +// ── Enhanced DMA job: 7 option bytes + end byte + F018B list ───────────────── |
| 87 | +const DmaJob = packed struct { |
| 88 | + opt0: u8, |
| 89 | + opt1: u8, |
| 90 | + opt2: u8, |
| 91 | + opt3: u8, |
| 92 | + opt4: u8, |
| 93 | + opt5: u8, |
| 94 | + opt6: u8, |
| 95 | + end_option: u8, |
| 96 | + dmalist: DMAList_F018B, |
| 97 | +}; |
| 98 | + |
| 99 | +// ── Tile row buffer: CPU-side staging area, one row at a time (BSS) ─────────── |
| 100 | +var tile_row_buf: [TILE_ROW_BYTES]u8 = undefined; |
| 101 | + |
| 102 | +// ── Comptime palette: blue→cyan→green→yellow→red over MAX_ITER steps ────────── |
| 103 | +// Entry 0 = black (interior / set member). Entries 1..MAX_ITER = escaped pixels. |
| 104 | +// Reversed-nybble encoding: 4-bit intensity n → (n<<4)|n for full brightness. |
| 105 | +const SEGMENT_LEN: u8 = 8; |
| 106 | +const MAX_INTENSITY: u8 = 15; |
| 107 | + |
| 108 | +fn nyb(n: u8) u8 { |
| 109 | + return (n << 4) | n; |
| 110 | +} |
| 111 | + |
| 112 | +const Palette = struct { r: [MAX_ITER + 1]u8, g: [MAX_ITER + 1]u8, b: [MAX_ITER + 1]u8 }; |
| 113 | + |
| 114 | +const palette: Palette = blk: { |
| 115 | + var p = Palette{ |
| 116 | + .r = @splat(0), |
| 117 | + .g = @splat(0), |
| 118 | + .b = @splat(0), |
| 119 | + }; |
| 120 | + var i: u8 = 0; |
| 121 | + while (i < MAX_ITER) : (i += 1) { |
| 122 | + const pos: u8 = i & (SEGMENT_LEN - 1); |
| 123 | + const v: u8 = @intCast(@as(u16, pos) * MAX_INTENSITY / (SEGMENT_LEN - 1)); |
| 124 | + var rv: u8 = 0; |
| 125 | + var gv: u8 = 0; |
| 126 | + var bv: u8 = 0; |
| 127 | + if (i < SEGMENT_LEN) { |
| 128 | + gv = v; |
| 129 | + bv = MAX_INTENSITY; |
| 130 | + } else if (i < SEGMENT_LEN * 2) { |
| 131 | + gv = MAX_INTENSITY; |
| 132 | + bv = MAX_INTENSITY - v; |
| 133 | + } else if (i < SEGMENT_LEN * 3) { |
| 134 | + rv = v; |
| 135 | + gv = MAX_INTENSITY; |
| 136 | + } else { |
| 137 | + rv = MAX_INTENSITY; |
| 138 | + gv = MAX_INTENSITY - v; |
| 139 | + } |
| 140 | + p.r[i + 1] = nyb(rv); |
| 141 | + p.g[i + 1] = nyb(gv); |
| 142 | + p.b[i + 1] = nyb(bv); |
| 143 | + } |
| 144 | + break :blk p; |
| 145 | +}; |
| 146 | + |
| 147 | +// ── Hardware 8.8 fixed-point multiply ──────────────────────────────────────── |
| 148 | +// Sign-extend 16-bit inputs to 32 bits; lower 32 bits of unsigned product |
| 149 | +// match the signed result for equal-width inputs. Shift right 8 → 8.8 result. |
| 150 | +inline fn fpMul(a: Fix16, b: Fix16) Fix16 { |
| 151 | + math_a.* = @as(i32, a); |
| 152 | + math_b.* = @as(i32, b); |
| 153 | + return @truncate(math_out.* >> 8); |
| 154 | +} |
| 155 | + |
| 156 | +// ── Escape-time Mandelbrot iteration ───────────────────────────────────────── |
| 157 | +fn mandelbrot(cr: Fix16, ci: Fix16) u8 { |
| 158 | + const FP_FOUR: Fix16 = 4 * FP_ONE; |
| 159 | + var zr: Fix16 = 0; |
| 160 | + var zi: Fix16 = 0; |
| 161 | + var i: u8 = 0; |
| 162 | + while (i < MAX_ITER) : (i += 1) { |
| 163 | + const zr2 = fpMul(zr, zr); |
| 164 | + const zi2 = fpMul(zi, zi); |
| 165 | + if (zr2 + zi2 > FP_FOUR) return i; |
| 166 | + zi = fpMul(zr, zi); |
| 167 | + zi +%= zi; // 2·zr·zi (wrapping matches C int16_t) |
| 168 | + zi +%= ci; |
| 169 | + zr = zr2 - zi2 + cr; |
| 170 | + } |
| 171 | + return MAX_ITER; |
| 172 | +} |
| 173 | + |
| 174 | +// ── Enhanced DMA helpers ────────────────────────────────────────────────────── |
| 175 | +fn triggerDma(job: *const DmaJob) void { |
| 176 | + // addr_msb must be written before trigger_enhanced (the write that starts DMA). |
| 177 | + const addr: u16 = @intCast(@intFromPtr(job)); |
| 178 | + const dma_enable: *volatile u8 = @ptrFromInt(0xd703); // enable_f018b |
| 179 | + const dma_bank: *volatile u8 = @ptrFromInt(0xd702); // addr_bank |
| 180 | + const dma_msb: *volatile u8 = @ptrFromInt(0xd701); // addr_msb |
| 181 | + const dma_trigger: *volatile u8 = @ptrFromInt(0xd705); // trigger_enhanced |
| 182 | + dma_enable.* = 1; |
| 183 | + dma_bank.* = 0; |
| 184 | + dma_msb.* = @truncate(addr >> 8); |
| 185 | + dma_trigger.* = @truncate(addr); // triggers DMA |
| 186 | +} |
| 187 | + |
| 188 | +fn makeDmaFill(dst: u32, value: u8, count: u16) DmaJob { |
| 189 | + return .{ |
| 190 | + .opt0 = ENABLE_F018B_OPT, |
| 191 | + .opt1 = SRC_ADDR_BITS_OPT, |
| 192 | + .opt2 = 0, |
| 193 | + .opt3 = DST_ADDR_BITS_OPT, |
| 194 | + .opt4 = @truncate(dst >> 20), |
| 195 | + .opt5 = DST_SKIP_RATE_OPT, |
| 196 | + .opt6 = 1, |
| 197 | + .end_option = 0, |
| 198 | + .dmalist = .{ |
| 199 | + .command = DMA_FILL_CMD, |
| 200 | + .count = count, |
| 201 | + .source_addr = value, |
| 202 | + .source_bank = 0, |
| 203 | + .dest_addr = @truncate(dst), |
| 204 | + .dest_bank = @truncate(dst >> 16), |
| 205 | + .command_msb = 0, |
| 206 | + .modulo = 0, |
| 207 | + }, |
| 208 | + }; |
| 209 | +} |
| 210 | + |
| 211 | +fn makeDmaCopy(src: u32, dst: u32, count: u16) DmaJob { |
| 212 | + var job = makeDmaFill(dst, 0, count); |
| 213 | + job.opt2 = @truncate(src >> 20); |
| 214 | + job.dmalist.command = DMA_COPY_CMD; |
| 215 | + job.dmalist.source_addr = @truncate(src); |
| 216 | + job.dmalist.source_bank = @truncate(src >> 16); |
| 217 | + return job; |
| 218 | +} |
| 219 | + |
| 220 | +// ── VIC-IV setup: FCM 320×200 mode ─────────────────────────────────────────── |
| 221 | +fn setupVic() void { |
| 222 | + asm volatile ("sei"); |
| 223 | + |
| 224 | + vic.key = 0x47; // VIC-IV unlock knock sequence |
| 225 | + vic.key = 0x53; |
| 226 | + |
| 227 | + // Disable hot registers so we can program VIC-IV directly |
| 228 | + (@as(*volatile u8, @ptrFromInt(0xd05d))).* &= ~VIC4_HOTREG_MASK; |
| 229 | + |
| 230 | + // 40 MHz: POKE 0,65 via CPU port DDR (VIC3_FAST alone gives only 3.5 MHz) |
| 231 | + cpu_portddr.* = 65; |
| 232 | + |
| 233 | + vic.ctrlb = (vic.ctrlb | VIC3_FAST_MASK | VIC3_ATTR_MASK) & |
| 234 | + ~(VIC3_H640_MASK | VIC3_V400_MASK); |
| 235 | + |
| 236 | + // CHR16 + FCLRHI: tiles with index > $FF use full-color per-pixel palette. |
| 237 | + // Our indices start at 0x1000, so every tile uses FCM. |
| 238 | + vic.ctrlc = (vic.ctrlc & ~VIC4_FCLRLO_MASK) | VIC4_CHR16_MASK | VIC4_FCLRHI_MASK; |
| 239 | + |
| 240 | + // Screen RAM at $0800 (reuse KERNAL default — avoids relocating 2 KB) |
| 241 | + scrnptr_lsb.* = 0x00; |
| 242 | + scrnptr_msb.* = 0x08; |
| 243 | + scrnptr_bnk.* = 0x00; |
| 244 | + scrnptr_mb.* = 0x00; |
| 245 | + |
| 246 | + // Tile data base at address 0; tile N maps to bytes [N×64 .. N×64+63] |
| 247 | + charptr_lsb.* = 0x00; |
| 248 | + charptr_msb.* = 0x00; |
| 249 | + charptr_bnk.* = 0x00; |
| 250 | + |
| 251 | + // 80 bytes per row (CHR16: 2 bytes/cell × 40 cols), 40 chars, 25 rows |
| 252 | + vic.linestep = @as(u16, CELL_COLS) * 2; |
| 253 | + vic.chrcount = CELL_COLS; |
| 254 | + vic.disp_rows = CELL_ROWS; |
| 255 | + |
| 256 | + // FCM is a text-mode extension — BMM must be off. |
| 257 | + // Preserve raster MSB ($C0), set DEN|RSEL|YSCROLL=3 ($1B). |
| 258 | + vic.ctrl1 = (vic.ctrl1 & 0xC0) | 0x1B; |
| 259 | + // Preserve unused high bits ($E0), set CSEL ($08). |
| 260 | + vic.ctrl2 = (vic.ctrl2 & 0xE0) | 0x08; |
| 261 | + |
| 262 | + vic.bordercol = 0; |
| 263 | + vic.screencol = 0; |
| 264 | + |
| 265 | + // Use palette RAM for colors 0-15 (16+ always use palette RAM) |
| 266 | + vic.ctrla |= VIC3_PAL_MASK; |
| 267 | +} |
| 268 | + |
| 269 | +// ── Screen and tile memory initialization ───────────────────────────────────── |
| 270 | +fn setupScreen() void { |
| 271 | + // Upload the comptime palette into hardware palette RAM |
| 272 | + for (0..MAX_ITER + 1) |i| { |
| 273 | + pal_red[i] = palette.r[i]; |
| 274 | + pal_green[i] = palette.g[i]; |
| 275 | + pal_blue[i] = palette.b[i]; |
| 276 | + } |
| 277 | + |
| 278 | + // Each screen cell gets a unique tile index pointing into $40000 graphics area |
| 279 | + for (0..NUM_CELLS) |i| { |
| 280 | + screen16[i] = TILE_BASE + @as(u16, @intCast(i)); |
| 281 | + } |
| 282 | + |
| 283 | + // Neutral color RAM: prevent unwanted FCM flip/alpha attributes |
| 284 | + for (0..NUM_CELLS) |i| { |
| 285 | + color_ram[i] = 0; |
| 286 | + } |
| 287 | + |
| 288 | + // Zero the whole graphics area so unrendered rows appear black |
| 289 | + const fill_job = makeDmaFill(GFX_ADDR, 0, NUM_CELLS * TILE_BYTES); |
| 290 | + triggerDma(&fill_job); |
| 291 | +} |
| 292 | + |
| 293 | +// ── Fractal rendering: one tile row at a time, DMA-copied to $40000+ ────────── |
| 294 | +fn renderFractal() void { |
| 295 | + // View window: real ∈ [−2.0, 0.6], imag ∈ [−1.0, 1.0] |
| 296 | + const RE_MIN: Fix16 = -2 * FP_ONE; |
| 297 | + const RE_MAX: Fix16 = @intFromFloat(0.6 * 256.0); |
| 298 | + const IM_MIN: Fix16 = -FP_ONE; |
| 299 | + const IM_MAX: Fix16 = FP_ONE; |
| 300 | + const RE_STEP: Fix16 = (RE_MAX - RE_MIN) / 320; |
| 301 | + const IM_STEP: Fix16 = (IM_MAX - IM_MIN) / 200; |
| 302 | + |
| 303 | + for (0..CELL_ROWS) |cy| { |
| 304 | + for (0..CELL_COLS) |cx| { |
| 305 | + const tile_off: u16 = @as(u16, @intCast(cx)) * TILE_BYTES; |
| 306 | + for (0..TILE_PIXELS) |py| { |
| 307 | + const y: Fix16 = @intCast(@as(u16, @intCast(cy)) * TILE_PIXELS + py); |
| 308 | + for (0..TILE_PIXELS) |px| { |
| 309 | + const x: Fix16 = @intCast(@as(u16, @intCast(cx)) * TILE_PIXELS + px); |
| 310 | + const cr: Fix16 = RE_MIN + x * RE_STEP; |
| 311 | + const ci: Fix16 = IM_MIN + y * IM_STEP; |
| 312 | + const iter = mandelbrot(cr, ci); |
| 313 | + tile_row_buf[tile_off + py * TILE_PIXELS + px] = |
| 314 | + if (iter >= MAX_ITER) 0 else iter + 1; |
| 315 | + } |
| 316 | + } |
| 317 | + } |
| 318 | + |
| 319 | + // DMA-copy completed tile row from CPU RAM to graphics memory |
| 320 | + const src: u32 = @intFromPtr(&tile_row_buf); |
| 321 | + const dst: u32 = GFX_ADDR + @as(u32, @intCast(cy)) * TILE_ROW_BYTES; |
| 322 | + const copy_job = makeDmaCopy(src, dst, TILE_ROW_BYTES); |
| 323 | + triggerDma(©_job); |
| 324 | + } |
| 325 | +} |
| 326 | + |
| 327 | +export fn main() void { |
| 328 | + setupVic(); |
| 329 | + setupScreen(); |
| 330 | + renderFractal(); |
| 331 | + while (true) {} |
| 332 | +} |
0 commit comments