Skip to content

Commit dc67977

Browse files
committed
Fix automotive & snake demos halting after 512 compartment faults
See the comments (and provided links) for more info about this switcher limitation/interface. Signed-off-by: Alex Jones <alex.jones@lowrisc.org>
1 parent a02f04c commit dc67977

2 files changed

Lines changed: 60 additions & 3 deletions

File tree

examples/automotive/cheri/send.cc

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,35 @@ void reset_error_seen_and_shown()
151151
* violation message or not, and display it if so. This will only be
152152
* drawn once, and then `errorMessageShown` will be set true.
153153
*/
154-
void lcd_display_cheri_message()
154+
void update_cheri_error_handling()
155155
{
156-
if (!errorSeen || errorMessageShown)
156+
if (!errorSeen)
157+
{
158+
return;
159+
}
160+
161+
/*
162+
* We expect that we might correct the compartment's fault many times,
163+
* especially since the error is being triggered every frame. As a
164+
* built-in protection, the CHERIoT RTOS switcher places a default
165+
* limit of 512 times a compartment invocation may fault, to help
166+
* stop compartments getting stuck during recovery.
167+
*
168+
* Since this is a legitimate use case, and we don't want our demo
169+
* to stop working after 512 errors, reset the counter each time.
170+
*
171+
* We have to restore it here and not in the error handler as otherwise
172+
* the switcher will decrement its count and then underflow, which it
173+
* will see as a double fault and then unwind.
174+
*
175+
* See:
176+
* https://github.com/CHERIoT-Platform/cheriot-rtos/issues/299
177+
* https://github.com/CHERIoT-Platform/cheriot-rtos/blob/9f3731c0e3805ad56a642987be9bc859e2ee1b4e/sdk/include/switcher.h#L40-L41
178+
*/
179+
switcher_handler_invocation_count_reset();
180+
errorSeen = false;
181+
182+
if (errorMessageShown)
157183
{
158184
return;
159185
}
@@ -537,7 +563,7 @@ void __cheri_compartment("automotive_send") entry()
537563
.wait = wait,
538564
.waitTime = 120 * CyclesPerMillisecond,
539565
.time = rdcycle64,
540-
.loop = lcd_display_cheri_message,
566+
.loop = update_cheri_error_handling,
541567
.start = reset_error_seen_and_shown,
542568
.joystick_read = read_joystick,
543569
.digital_pedal_read = read_pedal_digital,

examples/snake/snake.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ static constexpr Color BackgroundColor = Color::Black,
3939
static constexpr Size TileSize = {10, 10}, TileSpacing = {2, 2},
4040
BorderSize = {4, 3};
4141

42+
// Global flag used by the CHERI compartment error handler to detect when a
43+
// capability violation has occurred, so that we can modify the switcher context
44+
// appropriately.
45+
static bool errorSeen = false;
46+
4247
typedef struct Position
4348
{
4449
int32_t x;
@@ -511,6 +516,26 @@ class SnakeGame
511516
return true;
512517
}
513518

519+
/* If any fault was seen in the compartment, handle the error.*/
520+
void handle_compartment_faults()
521+
{
522+
/*
523+
* Just in case somebody wants to play 512+ games of Snake without
524+
* resetting, lets be safe and reset the switcher handler invocation
525+
* count due to our PCC re-installation.
526+
*
527+
* We have to restore it here and not in the error handler as otherwise
528+
* the switcher will decrement its count and then underflow, which it
529+
* will see as a double fault and then unwind.
530+
*
531+
* See:
532+
* https://github.com/CHERIoT-Platform/cheriot-rtos/issues/299
533+
* https://github.com/CHERIoT-Platform/cheriot-rtos/blob/9f3731c0e3805ad56a642987be9bc859e2ee1b4e/sdk/include/switcher.h#L40-L41
534+
* */
535+
switcher_handler_invocation_count_reset();
536+
errorSeen = false;
537+
}
538+
514539
/**
515540
* @brief Runs the main game loop, updating the snake's movement and drawing
516541
* new information to the display, and regulates update/frame timing.
@@ -551,6 +576,11 @@ class SnakeGame
551576
currentTime = rdcycle64();
552577

553578
gameStillActive = update_game_state(gpio, lcd);
579+
580+
if (errorSeen)
581+
{
582+
handle_compartment_faults();
583+
}
554584
}
555585
};
556586

@@ -627,6 +657,7 @@ compartment_error_handler(ErrorState *frame, size_t mcause, size_t mtval)
627657
if (exceptionCode == CauseCode::BoundsViolation ||
628658
exceptionCode == CauseCode::TagViolation)
629659
{
660+
errorSeen = true;
630661
// If an explicit out of bounds access occurs, or bounds are made
631662
// invalid by some negative array access, we **assume** that this was
632663
// caused by the SnakeGame::check_if_colliding function and that the

0 commit comments

Comments
 (0)