Skip to content

Commit 7e8cb5f

Browse files
committed
Terminate faulting tasks instead of panicking system
The previous panic behavior for unrecoverable faults was intentional during early development to surface bugs immediately. With the core functionality now stable, proper task termination is implemented to uphold the isolation principle. This change introduces a zombie state for deferred task cleanup. The fault handler marks the faulting task as terminated and signals the trap handler to initiate a context switch. The scheduler cleans up terminated task resources before selecting the next runnable task. This design addresses the limitation where running tasks cannot be directly cancelled from interrupt context. By deferring cleanup to the scheduler, the system ensures proper resource reclamation without modifying task state during fault handling. Memory regions are also evicted from hardware protection before being freed, preventing stale references after task termination.
1 parent 9021236 commit 7e8cb5f

6 files changed

Lines changed: 103 additions & 18 deletions

File tree

arch/riscv/hal.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -476,15 +476,40 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
476476
goto trap_exit;
477477
}
478478

479-
/* Attempt to recover PMP access faults (code 5 = load fault, 7 = store
480-
* fault) */
479+
/* Attempt to recover load/store access faults.
480+
*
481+
* This assumes all U-mode access faults are PMP-related, which holds
482+
* for platforms without MMU where PMP is the sole memory protection.
483+
* On MCU hardware, bus faults or access to non-existent memory may
484+
* also trigger access exceptions, and terminating the task instead
485+
* of panicking could hide such hardware issues.
486+
*/
481487
if (code == 5 || code == 7) {
482488
uint32_t mtval = read_csr(mtval);
483-
if (pmp_handle_access_fault(mtval, code == 7) == 0) {
489+
int32_t pmp_result = pmp_handle_access_fault(mtval, code == 7);
490+
if (pmp_result == PMP_FAULT_RECOVERED) {
484491
/* PMP fault handled successfully, return current frame */
485492
ret_sp = isr_sp;
486493
goto trap_exit;
487494
}
495+
if (pmp_result == PMP_FAULT_TERMINATE) {
496+
/* Task terminated (marked as zombie), switch to next task.
497+
* Print diagnostic before switching. */
498+
trap_puts("[PMP] Task terminated: ");
499+
trap_puts(code == 7 ? "Store" : "Load");
500+
trap_puts(" access fault at 0x");
501+
for (int i = 28; i >= 0; i -= 4) {
502+
uint32_t nibble = (mtval >> i) & 0xF;
503+
_putchar(nibble < 10 ? '0' + nibble : 'A' + nibble - 10);
504+
}
505+
trap_puts("\r\n");
506+
507+
/* Force context switch to next task */
508+
dispatcher(0);
509+
ret_sp =
510+
pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp;
511+
goto trap_exit;
512+
}
488513
}
489514

490515
/* Print exception info via direct UART (safe in trap context) */

arch/riscv/pmp.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -605,15 +605,16 @@ int32_t pmp_evict_fpage(fpage_t *fpage)
605605
return 0;
606606
}
607607

608-
/* Handles PMP access faults */
608+
/* Handles PMP access faults by loading the required flexpage into hardware. */
609609
int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write)
610610
{
611611
if (!kcb || !kcb->task_current || !kcb->task_current->data)
612-
return -1;
612+
return PMP_FAULT_UNHANDLED;
613613

614-
memspace_t *mspace = ((tcb_t *) kcb->task_current->data)->mspace;
614+
tcb_t *current = (tcb_t *) kcb->task_current->data;
615+
memspace_t *mspace = current->mspace;
615616
if (!mspace)
616-
return -1;
617+
return PMP_FAULT_UNHANDLED;
617618

618619
/* Find flexpage containing faulting address */
619620
fpage_t *target_fpage = NULL;
@@ -624,20 +625,24 @@ int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write)
624625
}
625626
}
626627

627-
if (!target_fpage || target_fpage->pmp_id != PMP_INVALID_REGION)
628-
return -1;
628+
/* Cannot recover: address not in task's memory space or already loaded */
629+
if (!target_fpage || target_fpage->pmp_id != PMP_INVALID_REGION) {
630+
/* Mark task as zombie for deferred cleanup */
631+
current->state = TASK_ZOMBIE;
632+
return PMP_FAULT_TERMINATE;
633+
}
629634

630635
pmp_config_t *config = pmp_get_config();
631636
if (!config)
632-
return -1;
637+
return PMP_FAULT_UNHANDLED;
633638

634639
/* Load into available region or evict victim */
635640
if (config->next_region_idx < PMP_MAX_REGIONS)
636641
return pmp_load_fpage(target_fpage, config->next_region_idx);
637642

638643
fpage_t *victim = select_victim_fpage(mspace);
639644
if (!victim)
640-
return -1;
645+
return PMP_FAULT_UNHANDLED;
641646

642647
uint8_t victim_region = victim->pmp_id;
643648
int32_t ret = pmp_evict_fpage(victim);

arch/riscv/pmp.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ int32_t pmp_load_fpage(fpage_t *fpage, uint8_t region_idx);
155155
*/
156156
int32_t pmp_evict_fpage(fpage_t *fpage);
157157

158+
/* PMP Fault Handler Return Codes */
159+
#define PMP_FAULT_RECOVERED 0 /* Fault recovered, resume execution */
160+
#define PMP_FAULT_UNHANDLED (-1) /* Cannot recover, fall through to default */
161+
#define PMP_FAULT_TERMINATE \
162+
(-2) /* Task terminated, caller invokes dispatcher */
163+
158164
/* Handles PMP access violations.
159165
*
160166
* Attempts to recover from PMP access faults by loading the required memory
@@ -163,7 +169,7 @@ int32_t pmp_evict_fpage(fpage_t *fpage);
163169
*
164170
* @fault_addr : The faulting memory address (from mtval CSR)
165171
* @is_write : 1 for store/AMO access, 0 for load
166-
* Returns 0 on successful recovery, negative error code on failure.
172+
* Returns PMP_FAULT_RECOVERED, PMP_FAULT_UNHANDLED, or PMP_FAULT_TERMINATE.
167173
*/
168174
int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write);
169175

include/sys/task.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@ enum task_priorities {
3737

3838
/* Task Lifecycle States */
3939
enum task_states {
40-
TASK_STOPPED, /* Task created but not yet scheduled */
41-
TASK_READY, /* Task in ready state, waiting to be scheduled */
42-
TASK_RUNNING, /* Task currently executing on CPU */
43-
TASK_BLOCKED, /* Task waiting for delay timer to expire */
44-
TASK_SUSPENDED /* Task paused/excluded from scheduling until resumed */
40+
TASK_STOPPED, /* Task created but not yet scheduled */
41+
TASK_READY, /* Task in ready state, waiting to be scheduled */
42+
TASK_RUNNING, /* Task currently executing on CPU */
43+
TASK_BLOCKED, /* Task waiting for delay timer to expire */
44+
TASK_SUSPENDED, /* Task paused/excluded from scheduling until resumed */
45+
TASK_ZOMBIE /* Task terminated, awaiting resource cleanup */
4546
};
4647

4748
/* Priority Level Constants for Priority-Aware Time Slicing */

kernel/memprot.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,13 @@ void mo_memspace_destroy(memspace_t *mspace)
8484
if (!mspace)
8585
return;
8686

87-
/* Free all flexpages in the list */
87+
/* Evict and free all flexpages in the list */
8888
fpage_t *fp = mspace->first;
8989
while (fp) {
9090
fpage_t *next = fp->as_next;
91+
/* Evict from PMP hardware before freeing to prevent stale references */
92+
if (fp->pmp_id != PMP_INVALID_REGION)
93+
pmp_evict_fpage(fp);
9194
mo_fpage_destroy(fp);
9295
fp = next;
9396
}

kernel/task.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,46 @@ void yield(void);
351351
void _dispatch(void) __attribute__((weak, alias("dispatch")));
352352
void _yield(void) __attribute__((weak, alias("yield")));
353353

354+
/* Zombie Task Cleanup
355+
*
356+
* Scans the task list for terminated (zombie) tasks and frees their resources.
357+
* Called from dispatcher to ensure cleanup happens in a safe context.
358+
*/
359+
static void task_cleanup_zombies(void)
360+
{
361+
if (!kcb || !kcb->tasks)
362+
return;
363+
364+
list_node_t *node = list_next(kcb->tasks->head);
365+
while (node && node != kcb->tasks->tail) {
366+
list_node_t *next = list_next(node);
367+
tcb_t *tcb = node->data;
368+
369+
if (tcb && tcb->state == TASK_ZOMBIE) {
370+
/* Remove from task list */
371+
list_remove(kcb->tasks, node);
372+
kcb->task_count--;
373+
374+
/* Clear from lookup cache */
375+
for (int i = 0; i < TASK_CACHE_SIZE; i++) {
376+
if (task_cache[i].task == tcb) {
377+
task_cache[i].id = 0;
378+
task_cache[i].task = NULL;
379+
}
380+
}
381+
382+
/* Free all resources */
383+
if (tcb->mspace)
384+
mo_memspace_destroy(tcb->mspace);
385+
free(tcb->stack);
386+
if (tcb->kernel_stack)
387+
free(tcb->kernel_stack);
388+
free(tcb);
389+
}
390+
node = next;
391+
}
392+
}
393+
354394
/* Round-Robin Scheduler Implementation
355395
*
356396
* Implements an efficient round-robin scheduler tweaked for small systems.
@@ -531,6 +571,9 @@ void dispatch(void)
531571
if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data))
532572
panic(ERR_NO_TASKS);
533573

574+
/* Clean up any terminated (zombie) tasks */
575+
task_cleanup_zombies();
576+
534577
/* Save current context - only needed for cooperative mode.
535578
* In preemptive mode, ISR already saved context to stack,
536579
* so we skip this step to avoid interference.
@@ -915,6 +958,8 @@ int32_t mo_task_cancel(uint16_t id)
915958
CRITICAL_LEAVE();
916959

917960
/* Free memory outside critical section */
961+
if (tcb->mspace)
962+
mo_memspace_destroy(tcb->mspace);
918963
free(tcb->stack);
919964
if (tcb->kernel_stack)
920965
free(tcb->kernel_stack);

0 commit comments

Comments
 (0)