Skip to content

Commit 0c81ed9

Browse files
committed
Linear execution test of HyperRAM interface
Add a new test that both checks and measures the performance of executing a linear sequence of repeated instructions from the HyperRAM. This is perhaps more relevant for most current uses of the HyperRAM because it is largely intended to be used for additional code rather than loading/storing data.
1 parent 6696631 commit 0c81ed9

1 file changed

Lines changed: 80 additions & 6 deletions

File tree

sw/cheri/checks/hyperram_test.cc

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@ using namespace CHERI;
2828
const int RandTestBlockSize = 256;
2929
const int HyperramSize = (1024 * 1024) / 4;
3030

31+
// Ensure that all writing of code to memory has completed before commencing execution
32+
// of that code. Code has been written to [start, end] with both addresses being
33+
// inclusive.
34+
static inline void instr_fence(volatile uint32_t *start, volatile uint32_t *end) {
35+
// CPU fence instruction, but this does not guarantee the ordering of transactions
36+
// with the two TL-UL crossbars and a memory (SRAM or HyperRAM) presenting two
37+
// separated ports onto those two crossbars.
38+
asm volatile("fence.i" : : : "memory");
39+
40+
// By writing the first word of the code again we can ensure that the code is
41+
// flushed out to the HyperRAM and will thus be coherent with instruction
42+
// fetching when the code is executed.
43+
*start = *start;
44+
}
45+
3146
// Write random values to a block of memory (size given by 'RandTestBlockSize'
3247
// global constant). Reads them all back and checks read values matched written
3348
// values.
@@ -199,12 +214,7 @@ void write_prog(Capability<volatile uint32_t> &hyperram_area, uint32_t addr) {
199214
hyperram_area[addr + 3] = 0x00000517;
200215
hyperram_area[addr + 4] = 0x8082;
201216

202-
asm volatile("fence.i" : : : "memory");
203-
204-
// By writing the first word of the code again we can ensure that the code is
205-
// flushed out to the HyperRAM and will thus be coherent with instruction
206-
// fetching when the code is executed.
207-
hyperram_area[addr] = hyperram_area[addr];
217+
instr_fence(&hyperram_area[addr], &hyperram_area[addr + 4]);
208218
}
209219

210220
// Writes a short function to a random area of hyperram and executes it checking
@@ -615,6 +625,51 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
615625
return failures;
616626
}
617627

628+
// Simple performance test of linear code execution from the HyperRAM.
629+
// - build a sequence of repeated instructions at the given address
630+
int linear_execution_test(Capability<volatile uint32_t> hyperram_w_area, ds::xoroshiro::P64R32 &prng, Log &log,
631+
bool report_times = true, uint32_t prog_addr = UINT32_MAX, uint32_t prog_len = 0x2000u,
632+
int iterations = 1) {
633+
int failures = 0;
634+
635+
// Choose a target address if not specified.
636+
if (prog_addr == UINT32_MAX) {
637+
prog_addr = prng() & 0x3fcu; // This is sufficient to achieve all valid alignments.
638+
}
639+
640+
// Emit code; 8KiB (2048 instructions) of repeated 'cincoffset ca0, ca0, 0x4' instructions.
641+
// 0045155b cincoffset ca0, ca0, 0x4
642+
// 00008067 cret
643+
const uint32_t cret_instr = 0x00008067u;
644+
const uint32_t inc_instr = 0x0045155bu;
645+
uint32_t cret_idx = (prog_addr + prog_len) >> 2;
646+
uint32_t prog_idx = prog_addr >> 2;
647+
for (uint32_t idx = prog_idx; idx < cret_idx; ++idx) {
648+
hyperram_w_area[idx] = inc_instr;
649+
}
650+
// Complete the code.
651+
hyperram_w_area[cret_idx] = cret_instr;
652+
test_fn_t test_fn = get_hyperram_fn_ptr(HYPERRAM_ADDRESS + prog_addr);
653+
654+
instr_fence(&hyperram_w_area[prog_idx], &hyperram_w_area[cret_idx]);
655+
656+
// Start timing the execution.
657+
uint32_t start_time = get_mcycle();
658+
for (int iter = 0; iter < iterations; ++iter) {
659+
// Invoke the function with a pointer to itself; each instruction advances the pointer
660+
// by one instruction.
661+
void *ret_ptr = test_fn((uint32_t *)&hyperram_w_area[prog_idx]);
662+
// Check the returned pointer indicates the `cret` instruction.
663+
failures += (ret_ptr != &hyperram_w_area[cret_idx]);
664+
}
665+
666+
if (report_times) {
667+
log.println(" {} iteration(s) took {} cycles", iterations, get_mcycle() - start_time);
668+
}
669+
670+
return failures;
671+
}
672+
618673
/**
619674
* C++ entry point for the loader. This is called from assembly, with the
620675
* read-write root in the first argument.
@@ -739,6 +794,25 @@ extern "C" [[noreturn]] void entry_point(void *rwRoot) {
739794
failures += buffering_test(hyperram_area, prng, 0x1000u);
740795
write_test_result(log, failures);
741796

797+
// Linear code sequence executing from HyperRAM.
798+
//
799+
// Executing with the icache disabled places more strain on the HyperRAM controller because
800+
// it will receive many more instruction fetches.
801+
const uint32_t lin_exec_len = 0x2000u; // 8KiB of code is larger than the icache.
802+
const int lin_exec_iters = 25;
803+
bool cache_enabled = false;
804+
do {
805+
cache_enabled = !cache_enabled;
806+
icache_enabled_set(cache_enabled);
807+
icache_invalidate();
808+
log.println("Running linear execution test with icache {:s}...", cache_enabled ? "enabled" : "disabled");
809+
failures += linear_execution_test(hyperram_area, prng, log, true, 0u, lin_exec_len, lin_exec_iters);
810+
log.print(" result...");
811+
write_test_result(log, failures);
812+
} while (cache_enabled);
813+
// Reinstate the normal icache operation.
814+
icache_enabled_set(true);
815+
742816
// Write tests exercise the write coalescing logic of the HyperRAM controller interface.
743817
log.println("Running write tests...");
744818
for (int test_type = WriteTestType_B; test_type <= WriteTestType_CD; ++test_type) {

0 commit comments

Comments
 (0)