@@ -28,6 +28,21 @@ using namespace CHERI;
2828const int RandTestBlockSize = 256 ;
2929const int HyperramSize = (1024 * 1024 ) / 4 ;
3030
31+ // Ensure that all writing of code to memory has completed before commencing execution
32+ // of that code. Code has been written to [start, end] with both addresses being
33+ // inclusive.
34+ static inline void instr_fence (volatile uint32_t *start, volatile uint32_t *end) {
35+ // CPU fence instruction, but this does not guarantee the ordering of transactions
36+ // with the two TL-UL crossbars and a memory (SRAM or HyperRAM) presenting two
37+ // separated ports onto those two crossbars.
38+ asm volatile (" fence.i" : : : " memory" );
39+
40+ // By writing the first word of the code again we can ensure that the code is
41+ // flushed out to the HyperRAM and will thus be coherent with instruction
42+ // fetching when the code is executed.
43+ *start = *start;
44+ }
45+
3146// Write random values to a block of memory (size given by 'RandTestBlockSize'
3247// global constant). Reads them all back and checks read values matched written
3348// values.
@@ -199,12 +214,7 @@ void write_prog(Capability<volatile uint32_t> &hyperram_area, uint32_t addr) {
199214 hyperram_area[addr + 3 ] = 0x00000517 ;
200215 hyperram_area[addr + 4 ] = 0x8082 ;
201216
202- asm volatile (" fence.i" : : : " memory" );
203-
204- // By writing the first word of the code again we can ensure that the code is
205- // flushed out to the HyperRAM and will thus be coherent with instruction
206- // fetching when the code is executed.
207- hyperram_area[addr] = hyperram_area[addr];
217+ instr_fence (&hyperram_area[addr], &hyperram_area[addr + 4 ]);
208218}
209219
210220// Writes a short function to a random area of hyperram and executes it checking
@@ -615,6 +625,51 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
615625 return failures;
616626}
617627
628+ // Simple performance test of linear code execution from the HyperRAM.
629+ // - build a sequence of repeated instructions at the given address
630+ int linear_execution_test (Capability<volatile uint32_t > hyperram_w_area, ds::xoroshiro::P64R32 &prng, Log &log,
631+ bool report_times = true , uint32_t prog_addr = UINT32_MAX, uint32_t prog_len = 0x2000u ,
632+ int iterations = 1 ) {
633+ int failures = 0 ;
634+
635+ // Choose a target address if not specified.
636+ if (prog_addr == UINT32_MAX) {
637+ prog_addr = prng () & 0x3fcu ; // This is sufficient to achieve all valid alignments.
638+ }
639+
640+ // Emit code; 8KiB (2048 instructions) of repeated 'cincoffset ca0, ca0, 0x4' instructions.
641+ // 0045155b cincoffset ca0, ca0, 0x4
642+ // 00008067 cret
643+ const uint32_t cret_instr = 0x00008067u ;
644+ const uint32_t inc_instr = 0x0045155bu ;
645+ uint32_t cret_idx = (prog_addr + prog_len) >> 2 ;
646+ uint32_t prog_idx = prog_addr >> 2 ;
647+ for (uint32_t idx = prog_idx; idx < cret_idx; ++idx) {
648+ hyperram_w_area[idx] = inc_instr;
649+ }
650+ // Complete the code.
651+ hyperram_w_area[cret_idx] = cret_instr;
652+ test_fn_t test_fn = get_hyperram_fn_ptr (HYPERRAM_ADDRESS + prog_addr);
653+
654+ instr_fence (&hyperram_w_area[prog_idx], &hyperram_w_area[cret_idx]);
655+
656+ // Start timing the execution.
657+ uint32_t start_time = get_mcycle ();
658+ for (int iter = 0 ; iter < iterations; ++iter) {
659+ // Invoke the function with a pointer to itself; each instruction advances the pointer
660+ // by one instruction.
661+ void *ret_ptr = test_fn ((uint32_t *)&hyperram_w_area[prog_idx]);
662+ // Check the returned pointer indicates the `cret` instruction.
663+ failures += (ret_ptr != &hyperram_w_area[cret_idx]);
664+ }
665+
666+ if (report_times) {
667+ log.println (" {} iteration(s) took {} cycles" , iterations, get_mcycle () - start_time);
668+ }
669+
670+ return failures;
671+ }
672+
618673/* *
619674 * C++ entry point for the loader. This is called from assembly, with the
620675 * read-write root in the first argument.
@@ -739,6 +794,25 @@ extern "C" [[noreturn]] void entry_point(void *rwRoot) {
739794 failures += buffering_test (hyperram_area, prng, 0x1000u );
740795 write_test_result (log, failures);
741796
797+ // Linear code sequence executing from HyperRAM.
798+ //
799+ // Executing with the icache disabled places more strain on the HyperRAM controller because
800+ // it will receive many more instruction fetches.
801+ const uint32_t lin_exec_len = 0x2000u ; // 8KiB of code is larger than the icache.
802+ const int lin_exec_iters = 25 ;
803+ bool cache_enabled = false ;
804+ do {
805+ cache_enabled = !cache_enabled;
806+ icache_enabled_set (cache_enabled);
807+ icache_invalidate ();
808+ log.println (" Running linear execution test with icache {:s}..." , cache_enabled ? " enabled" : " disabled" );
809+ failures += linear_execution_test (hyperram_area, prng, log, true , 0u , lin_exec_len, lin_exec_iters);
810+ log.print (" result..." );
811+ write_test_result (log, failures);
812+ } while (cache_enabled);
813+ // Reinstate the normal icache operation.
814+ icache_enabled_set (true );
815+
742816 // Write tests exercise the write coalescing logic of the HyperRAM controller interface.
743817 log.println (" Running write tests..." );
744818 for (int test_type = WriteTestType_B; test_type <= WriteTestType_CD; ++test_type) {
0 commit comments