Skip to content

Commit c5a7b16

Browse files
author
T. Andrew Davis
committed
Add Local APIC and Per-CPU data structures for SMP support
- Implemented LAPIC driver in `apic.rs` for initialization, EOI, IPI, and timer setup. - Added functions for LAPIC base probing, BSP/AP initialization, and timer calibration. - Introduced per-CPU data structure in `per_cpu.rs` to manage CPU-specific information. - Established ABI for per-CPU fields to ensure compatibility with assembly code. - Included functions for accessing and managing per-CPU data, including CPU count detection.
1 parent 47090f0 commit c5a7b16

31 files changed

Lines changed: 2587 additions & 452 deletions

bootloader/src/uefi_allocator.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ unsafe fn try_init_overflow() -> Option<OverflowState> {
220220
return None;
221221
}
222222

223-
let registry = morpheus_hwinit::global_registry_mut();
223+
let mut registry = morpheus_hwinit::global_registry_mut();
224224
let pages = (OVERFLOW_GROW_CHUNK as u64).div_ceil(morpheus_hwinit::PAGE_SIZE);
225225

226226
match registry.allocate_pages(
@@ -269,7 +269,7 @@ unsafe fn try_grow_overflow(state: &mut OverflowState, _needed: usize) -> bool {
269269
// linked_list_allocator::Heap::extend() can merge them into the free list.
270270
let extend_addr = state.base + state.size as u64;
271271

272-
let registry = morpheus_hwinit::global_registry_mut();
272+
let mut registry = morpheus_hwinit::global_registry_mut();
273273
match registry.allocate_pages(
274274
morpheus_hwinit::AllocateType::Address(extend_addr),
275275
morpheus_hwinit::MemoryType::AllocatedHeap,

hwinit/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ edition.workspace = true
55
description = "Hardware initialization layer for MorpheusX"
66

77
[features]
8-
default = []
8+
default = ["smp"]
9+
smp = []
910

1011
[dependencies]
1112
# Heap allocator backing

hwinit/asm/cpu/ap_trampoline.s

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; ═══════════════════════════════════════════════════════════════════════════
2+
; ap_trampoline.s — AP bootstrap: real mode → protected → long mode
3+
;
4+
; Assembled as flat binary (nasm -f bin). The BSP copies this blob to
5+
; physical address 0x8000 and sends SIPI with vector = 0x08.
6+
;
7+
; At SIPI delivery the AP starts executing at CS:IP = 0x0800:0x0000
8+
; in 16-bit real mode. We need to get to 64-bit long mode and jump
9+
; into ap_rust_entry(core_idx, lapic_id).
10+
;
11+
; DATA AREA at offset 0xF00 within this page is filled by ap_boot.rs
12+
; before each AP is woken. Layout must match the TD_* constants there.
13+
;
14+
; ═══════════════════════════════════════════════════════════════════════════
15+
16+
bits 16
17+
org 0x8000 ; physical load address
18+
19+
; ───────────────────────────────────────────────────────────────────────────
20+
; 16-bit real mode entry
21+
; ───────────────────────────────────────────────────────────────────────────
22+
ap_start:
23+
cli
24+
cld
25+
26+
; cs = 0x0800 from SIPI, set ds/es/ss to match
27+
mov ax, cs
28+
mov ds, ax
29+
mov es, ax
30+
mov ss, ax
31+
xor sp, sp ; stack at top of segment (wraps to 0xFFFF)
32+
33+
; ── load the GDT pointer from the data area ──────────────────────────
34+
; data area is at this_page + 0xF00. Our segment base = 0x8000.
35+
; so offset within segment = 0xF00 + 0x20 (TD_GDT_PTR within data area)
36+
lgdt [0xF20] ; 0x8000 + 0xF20 in linear addressing = offset 0xF20 from segment base
37+
38+
; ── enter protected mode ──────────────────────────────────────────────
39+
mov eax, cr0
40+
or eax, 1 ; PE bit
41+
mov cr0, eax
42+
43+
; far jump to 32-bit protected mode code
44+
; selector 0x08 = index 1 = kernel code descriptor in BSP's GDT
45+
jmp dword 0x08:ap_pm32
46+
47+
; ───────────────────────────────────────────────────────────────────────────
48+
; 32-bit protected mode
49+
; ───────────────────────────────────────────────────────────────────────────
50+
bits 32
51+
ap_pm32:
52+
; load data segments with kernel data selector (0x10)
53+
mov ax, 0x10
54+
mov ds, ax
55+
mov es, ax
56+
mov fs, ax
57+
mov gs, ax
58+
mov ss, ax
59+
60+
; ── enable PAE (required for long mode) ───────────────────────────────
61+
mov eax, cr4
62+
or eax, (1 << 5) ; CR4.PAE
63+
mov cr4, eax
64+
65+
; ── load kernel CR3 from data area ────────────────────────────────────
66+
mov eax, dword [0x8F00] ; TD_CR3 low 32 bits (phys address < 4GB)
67+
mov cr3, eax
68+
69+
; ── enable long mode via IA32_EFER.LME ────────────────────────────────
70+
mov ecx, 0xC0000080 ; IA32_EFER
71+
rdmsr
72+
or eax, (1 << 8) ; LME = bit 8
73+
wrmsr
74+
75+
; ── enable paging → activates long mode ───────────────────────────────
76+
mov eax, cr0
77+
or eax, (1 << 31) ; CR0.PG
78+
mov cr0, eax
79+
80+
; far jump to 64-bit long mode code
81+
jmp dword 0x08:ap_lm64
82+
83+
; ───────────────────────────────────────────────────────────────────────────
84+
; 64-bit long mode
85+
; ───────────────────────────────────────────────────────────────────────────
86+
bits 64
87+
ap_lm64:
88+
; reload data segments for 64-bit mode
89+
mov ax, 0x10
90+
mov ds, ax
91+
mov es, ax
92+
mov fs, ax
93+
mov ss, ax
94+
; intentionally skip gs — Rust will set GS base via MSR
95+
96+
; ── load the per-AP stack from data area ──────────────────────────────
97+
mov rsp, qword [0x8F10] ; TD_STACK
98+
99+
; ── read core_idx and lapic_id from data area ─────────────────────────
100+
mov ecx, dword [0x8F18] ; TD_CORE_IDX → RCX (arg1, MS x64)
101+
mov edx, dword [0x8F1C] ; TD_LAPIC_ID → RDX (arg2, MS x64)
102+
103+
; ── jump to Rust entry point ──────────────────────────────────────────
104+
mov rax, qword [0x8F08] ; TD_ENTRY64
105+
jmp rax ; ap_rust_entry(core_idx, lapic_id) — never returns
106+
107+
; ───────────────────────────────────────────────────────────────────────────
108+
; Pad to keep total code well under 0xF00 (data area starts there)
109+
; ───────────────────────────────────────────────────────────────────────────
110+
times (0xF00 - ($ - $$)) db 0
111+
112+
; ───────────────────────────────────────────────────────────────────────────
113+
; DATA AREA (offset 0xF00 within the 4K page)
114+
;
115+
; Filled by ap_boot.rs before each SIPI. Layout must match TD_* constants.
116+
; ───────────────────────────────────────────────────────────────────────────
117+
; 0xF00: CR3 (8 bytes)
118+
dd 0, 0
119+
120+
; 0xF08: ENTRY64 — 64-bit Rust entry point address (8 bytes)
121+
dd 0, 0
122+
123+
; 0xF10: STACK — per-AP kernel stack top (8 bytes)
124+
dd 0, 0
125+
126+
; 0xF18: CORE_IDX (4 bytes)
127+
dd 0
128+
129+
; 0xF1C: LAPIC_ID (4 bytes)
130+
dd 0
131+
132+
; 0xF20: GDT_PTR — 10 bytes (limit:2 + base:8), copied from BSP's SGDT
133+
dw 0 ; limit
134+
dd 0, 0 ; base (8 bytes, split as dd to avoid `dq` in 16-bit section)
135+
136+
; 0xF2A: padding to 0xF30
137+
times (0xF30 - (0xF20 + 10)) db 0
138+
139+
; 0xF30: READY flag (4 bytes) — AP sets to 1 when Rust entry is reached
140+
dd 0
141+
142+
; pad rest of page
143+
times (0x1000 - ($ - $$)) db 0

hwinit/asm/cpu/context_switch.s

Lines changed: 56 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,31 @@
11
; ═══════════════════════════════════════════════════════════════════════════
2-
; context_switch.s — Timer ISR + preemptive context switch
2+
; context_switch.s — Timer ISR + preemptive context switch (SMP-safe)
33
;
44
; ABI: Microsoft x64 (RCX, RDX, R8, R9, xmm0-3, shadow space)
55
; Format: PE/COFF (win64)
66
;
77
; Exports:
8-
; irq_timer_isr — installed in IDT vector 0x20 (PIT IRQ 0).
8+
; irq_timer_isr — installed in IDT vector 0x20 (LAPIC timer).
99
; Saves the current CpuContext + FPU/SSE state,
1010
; calls scheduler_tick() (Rust, MS x64), restores
1111
; the next process's state, and resumes via iretq.
1212
;
13+
; Per-CPU data is accessed via GS segment register. The kernel sets
14+
; IA32_GS_BASE to point at the PerCpu struct for each core. SWAPGS
15+
; is used on transitions between ring 3 and ring 0.
16+
;
17+
; PerCpu field offsets (must match per_cpu.rs):
18+
; gs:[0x00] self_ptr
19+
; gs:[0x08] cpu_id (u32)
20+
; gs:[0x0C] current_pid (u32)
21+
; gs:[0x10] next_cr3 (u64)
22+
; gs:[0x18] current_fpu_ptr (u64)
23+
; gs:[0x20] kernel_syscall_rsp (u64)
24+
; gs:[0x28] user_rsp_scratch (u64)
25+
; gs:[0x30] tss_ptr (u64)
26+
; gs:[0x38] lapic_base (u64)
27+
; gs:[0x40] tick_count (u64)
28+
;
1329
; CpuContext field layout (must match hwinit/src/process/context.rs):
1430
; 0x00 rax
1531
; 0x08 rbx
@@ -33,58 +49,37 @@
3349
; 0x98 ss
3450
; Total: 0xA0 (160) bytes
3551
;
36-
; FPU/SSE state (FpuState):
37-
; Saved/restored via FXSAVE/FXRSTOR through `current_fpu_ptr`.
38-
; 512 bytes, 16-byte aligned, stored per-process in PROCESS_TABLE.
39-
; The pointer is updated by scheduler_tick() when it picks the next process.
40-
;
4152
; iretq frame layout pushed by CPU at ISR entry (all 8-byte slots):
42-
; [rsp+0x00] RIP (return address in interrupted code)
43-
; [rsp+0x08] CS (code segment selector, zero-extended)
53+
; [rsp+0x00] RIP
54+
; [rsp+0x08] CS
4455
; [rsp+0x10] RFLAGS
45-
; [rsp+0x18] RSP (stack pointer before interrupt)
46-
; [rsp+0x20] SS (stack segment selector, zero-extended)
56+
; [rsp+0x18] RSP
57+
; [rsp+0x20] SS
4758
; Size: 0x28 (40) bytes
4859
; ═══════════════════════════════════════════════════════════════════════════
4960

5061
bits 64
5162
default rel
5263

53-
; ── Data ──────────────────────────────────────────────────────────────────
54-
section .data
55-
56-
align 8
57-
global next_cr3
58-
next_cr3: dq 0
59-
60-
; Pointer to the FpuState of the currently-running process.
61-
; Written by scheduler_tick() on every switch; read by this ISR for
62-
; FXSAVE (outgoing) and FXRSTOR (incoming). NULL during early boot
63-
; before the scheduler is initialized — guarded by a null check.
64-
align 16
65-
global current_fpu_ptr
66-
current_fpu_ptr: dq 0
64+
; LAPIC EOI register (identity-mapped)
65+
%define LAPIC_EOI_ADDR 0xFEE000B0
6766

6867
section .text
6968

7069
global irq_timer_isr
7170
extern scheduler_tick ; Rust fn: unsafe extern "C" (MS x64 ABI)
7271

7372
; ───────────────────────────────────────────────────────────────────────────
74-
; irq_timer_isr — PIT timer interrupt handler (vector 0x20)
75-
; ───────────────────────────────────────────────────────────────────────────
76-
; Stack layout at ISR entry (before any pushes):
77-
; [rsp+0x28] SS
78-
; [rsp+0x20] RSP (before IRQ)
79-
; [rsp+0x18] RFLAGS
80-
; [rsp+0x10] CS
81-
; [rsp+0x00] RIP
82-
;
83-
; After `sub rsp, 0xA0` our CpuContext struct lives at [rsp]:
84-
; [rsp+0x00 .. 0x9F] CpuContext
85-
; [rsp+0xA0 .. 0xC7] CPU iretq frame (5 × 8 bytes)
73+
; irq_timer_isr — LAPIC timer interrupt handler (vector 0x20)
8674
; ───────────────────────────────────────────────────────────────────────────
8775
irq_timer_isr:
76+
; ── SWAPGS if coming from user mode (ring 3) ─────────────────────────
77+
; check CS RPL in the iretq frame pushed by CPU
78+
test qword [rsp + 0x08], 3 ; CS is at rsp+8 (second qword)
79+
jz .no_swapgs_entry
80+
swapgs
81+
.no_swapgs_entry:
82+
8883
; ── Allocate CpuContext on stack ──────────────────────────────────────
8984
sub rsp, 0xA0
9085

@@ -118,49 +113,42 @@ irq_timer_isr:
118113
mov [rsp + 0x98], rax
119114

120115
; ── Save outgoing process FPU/SSE state (FXSAVE) ─────────────────────
121-
; current_fpu_ptr → &proc.fpu_state of the process being preempted.
122-
; Must happen BEFORE calling Rust (scheduler_tick may use XMM regs).
123-
mov rbx, [rel current_fpu_ptr]
116+
; per-CPU FPU pointer: gs:[0x18]
117+
mov rbx, [gs:0x18] ; current_fpu_ptr
124118
test rbx, rbx
125119
jz .skip_fxsave
126120
fxsave [rbx]
127121
.skip_fxsave:
128122

129-
; ── ACK PIT (send EOI to master PIC before calling Rust) ─────────────
130-
mov al, 0x20
131-
out 0x20, al
123+
; ── ACK LAPIC (write 0 to EOI register) ──────────────────────────────
124+
xor eax, eax
125+
mov dword [LAPIC_EOI_ADDR], eax
132126

133127
; ── Call scheduler_tick(current_ctx: *const CpuContext) ──────────────
134128
; MS x64 ABI: first arg in RCX. Need 32-byte shadow space on stack.
135-
; Current RSP = 8 mod 16 (verified in file header comment).
136-
; After sub 32 still 8 mod 16; CALL pushes 8 → callee sees 0 mod 16. ✓
137129
sub rsp, 32 ; shadow space
138130
lea rcx, [rsp + 32] ; &current_ctx
139131
call scheduler_tick ; RAX = *const CpuContext (next proc)
140132
add rsp, 32 ; remove shadow space
141133

142-
; RAX = *const CpuContext of next process (points into PROCESS_TABLE).
143-
; scheduler_tick has updated current_fpu_ptr to point to the incoming
144-
; process's FpuState.
134+
; RAX = *const CpuContext of next process.
135+
; scheduler_tick has updated gs:[0x18] (current_fpu_ptr) and
136+
; gs:[0x10] (next_cr3) for the incoming process.
145137

146138
; ── Restore incoming process FPU/SSE state (FXRSTOR) ──────────────────
147-
; Must happen AFTER scheduler_tick (which updated the pointer) and
148-
; BEFORE restoring GPRs (FXRSTOR clobbers no GPRs, but we use RBX as
149-
; scratch — RBX will be properly restored from the next context below).
150-
mov rbx, [rel current_fpu_ptr]
139+
mov rbx, [gs:0x18] ; updated current_fpu_ptr
151140
test rbx, rbx
152141
jz .skip_fxrstor
153142
fxrstor [rbx]
154143
.skip_fxrstor:
155144

156145
; ── Switch CR3 if process address spaces differ ───────────────────────
157-
; next_cr3 is written by scheduler_tick() before returning.
158-
mov rbx, [rel next_cr3]
146+
mov rbx, [gs:0x10] ; next_cr3 from PerCpu
159147
test rbx, rbx
160-
jz .skip_cr3 ; zero = unset, don't switch
148+
jz .skip_cr3
161149
mov rcx, cr3
162150
cmp rbx, rcx
163-
je .skip_cr3 ; same address space — avoid TLB flush
151+
je .skip_cr3
164152
mov cr3, rbx
165153
.skip_cr3:
166154

@@ -177,7 +165,6 @@ irq_timer_isr:
177165
mov [rsp + 0xA0 + 0x20], rbx
178166

179167
; ── Restore GPRs from next-process context ────────────────────────────
180-
; rbx is restored last (used as scratch above) except rax (used as ptr).
181168
mov r15, [rax + 0x70]
182169
mov r14, [rax + 0x68]
183170
mov r13, [rax + 0x60]
@@ -191,9 +178,18 @@ irq_timer_isr:
191178
mov rsi, [rax + 0x20]
192179
mov rdx, [rax + 0x18]
193180
mov rcx, [rax + 0x10]
194-
mov rbx, [rax + 0x08] ; restore rbx (was used as scratch)
195-
mov rax, [rax + 0x00] ; restore rax last
181+
mov rbx, [rax + 0x08]
182+
mov rax, [rax + 0x00]
183+
184+
; ── Remove CpuContext frame ───────────────────────────────────────────
185+
add rsp, 0xA0
186+
187+
; ── SWAPGS if returning to user mode (ring 3) ────────────────────────
188+
; check next CS in the patched iretq frame
189+
test qword [rsp + 0x08], 3
190+
jz .no_swapgs_exit
191+
swapgs
192+
.no_swapgs_exit:
196193

197194
; ── Return to next process ────────────────────────────────────────────
198-
add rsp, 0xA0 ; remove CpuContext frame
199195
iretq

0 commit comments

Comments
 (0)