Skip to content

Commit 02a88e5

Browse files
committed
Fixes for Xilinx ZynqMP ZCU102 SD card boot with Linux
Three targeted fixes required to cleanly boot a PetaLinux 2025.2 fitImage from SD card on the Xilinx ZCU102 (Zynq UltraScale+ MPSoC): 1. hal/zynq: implement hal_dts_fixup() to patch /chosen/bootargs in the DTB at runtime. Previously a stub; now mirrors the Versal implementation. Uses LINUX_BOOTARGS / LINUX_BOOTARGS_ROOT to override the PetaLinux-baked root= value, allowing wolfBoot's A/B partition layout (where rootfs is on mmcblk0p4, not p2). 2. hal/zynq: add hal_get_timer_us() using the ARMv8 generic timer (CNTPCT_EL0 / CNTFRQ_EL0). Required so sdhci.c udelay() works. 3. src/sdhci: fix CMD0 cold-boot timeout on Arasan SDHCI v3.0. Add 1ms udelay after sdhci_platform_init(), 1ms after sdhci_set_clock(400KHz), and a 10-retry loop with 10ms udelay between retries around the initial CMD0. Without these, CMD0 races the SD card power-up when DEBUG_SDHCI printf delays are not present. 4. src/boot_aarch64: clean D-cache to PoC and disable MMU + I/D-cache at EL2 before jumping to Linux. ARM64 Linux boot protocol requires MMU off and image cleaned to PoC; otherwise arm64_panic_block_init() panics with 'Non-EFI boot detected with MMU and caches enabled'. Adds el2_cleanup_and_jump_to_linux() asm helper (dc cisw loop + ic iallu + SCTLR_EL2.{M,C,I} clear + br) called from do_boot() when current_el() == 2.
1 parent 58c2e04 commit 02a88e5

File tree

4 files changed

+225
-5
lines changed

4 files changed

+225
-5
lines changed

hal/zynq.c

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@
5757
/* QSPI bare-metal */
5858
#endif
5959

60+
/* DTB fixup for kernel command line. Override LINUX_BOOTARGS or
61+
* LINUX_BOOTARGS_ROOT in your config to customize. */
62+
#ifdef __WOLFBOOT
63+
#ifndef LINUX_BOOTARGS
64+
#ifndef LINUX_BOOTARGS_ROOT
65+
#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p2"
66+
#endif
67+
#define LINUX_BOOTARGS \
68+
"earlycon console=ttyPS0,115200 root=" LINUX_BOOTARGS_ROOT " rootwait"
69+
#endif
70+
#endif /* __WOLFBOOT */
71+
6072
/* QSPI Slave Device Information */
6173
typedef struct QspiDev {
6274
uint32_t mode; /* GQSPI_GEN_FIFO_MODE_SPI, GQSPI_GEN_FIFO_MODE_DSPI or GQSPI_GEN_FIFO_MODE_QSPI */
@@ -1796,6 +1808,17 @@ void RAMFUNCTION ext_flash_unlock(void)
17961808
}
17971809

17981810
#ifdef MMU
1811+
/* Get current time in microseconds using ARMv8 generic timer */
1812+
uint64_t hal_get_timer_us(void)
1813+
{
1814+
uint64_t count, freq;
1815+
__asm__ volatile("mrs %0, CNTPCT_EL0" : "=r"(count));
1816+
__asm__ volatile("mrs %0, CNTFRQ_EL0" : "=r"(freq));
1817+
if (freq == 0)
1818+
return 0;
1819+
return (count * 1000000ULL) / freq;
1820+
}
1821+
17991822
void* hal_get_dts_address(void)
18001823
{
18011824
#ifdef WOLFBOOT_DTS_BOOT_ADDRESS
@@ -1809,8 +1832,39 @@ void* hal_get_dts_address(void)
18091832

18101833
int hal_dts_fixup(void* dts_addr)
18111834
{
1812-
/* place FDT fixup specific to ZynqMP here */
1813-
//fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt));
1835+
int off, ret;
1836+
struct fdt_header *fdt = (struct fdt_header *)dts_addr;
1837+
1838+
/* Verify FDT header */
1839+
ret = fdt_check_header(dts_addr);
1840+
if (ret != 0) {
1841+
wolfBoot_printf("FDT: Invalid header! %d\n", ret);
1842+
return ret;
1843+
}
1844+
1845+
wolfBoot_printf("FDT: Version %d, Size %d\n",
1846+
fdt_version(fdt), fdt_totalsize(fdt));
1847+
1848+
/* Expand totalsize to allow modifying properties */
1849+
fdt_set_totalsize(fdt, fdt_totalsize(fdt) + 512);
1850+
1851+
/* Find /chosen node */
1852+
off = fdt_find_node_offset(fdt, -1, "chosen");
1853+
if (off < 0) {
1854+
/* Create /chosen node if it doesn't exist */
1855+
off = fdt_add_subnode(fdt, 0, "chosen");
1856+
}
1857+
1858+
if (off >= 0) {
1859+
/* Set bootargs property - overrides PetaLinux default root= with
1860+
* the wolfBoot partition layout (root on P4, not P2) */
1861+
wolfBoot_printf("FDT: Setting bootargs: %s\n", LINUX_BOOTARGS);
1862+
fdt_fixup_str(fdt, off, "chosen", "bootargs", LINUX_BOOTARGS);
1863+
} else {
1864+
wolfBoot_printf("FDT: Failed to find/create chosen node (%d)\n", off);
1865+
return off;
1866+
}
1867+
18141868
return 0;
18151869
}
18161870
#endif

src/boot_aarch64.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ extern unsigned int _end_data;
4343
extern void main(void);
4444
extern void gicv2_init_secure(void);
4545

46+
/* Asm helper in boot_aarch64_start.S: cleans the entire D-cache to PoC,
47+
* disables MMU + I-cache + D-cache via SCTLR_EL2, and branches to a Linux
48+
* kernel entry point with the DTB pointer in x0. Required because wolfBoot's
49+
* EL2 startup enables MMU/caches for its own use, and the ARM64 Linux boot
50+
* protocol requires them OFF at entry. Only built when EL2_HYPERVISOR == 1
51+
* is visible to boot_aarch64_start.S (e.g. via hal/zynq.h on ZynqMP). */
52+
extern void el2_cleanup_and_jump_to_linux(uintptr_t entry, uintptr_t dtb)
53+
__attribute__((noreturn));
54+
4655
/* SKIP_GIC_INIT - Skip GIC initialization before booting app
4756
* This is needed for:
4857
* - Versal: Uses GICv3, not GICv2. BL31 handles GIC setup.
@@ -163,7 +172,25 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset)
163172
el2_to_el1_boot((uintptr_t)app_offset, dts);
164173
}
165174
#else
166-
/* Stay at current EL (EL2 or EL3) and jump directly to application */
175+
/* Stay at current EL (EL2 or EL3) and jump directly to application.
176+
*
177+
* If we're at EL2 we MUST clean dcache to PoC and disable MMU/I+D-cache
178+
* before jumping to a Linux kernel, or Linux's arm64_panic_block_init()
179+
* will panic with "Non-EFI boot detected with MMU and caches enabled".
180+
* The asm helper does the dcache clean, SCTLR_EL2 fixup, and the branch
181+
* to entry with x0=DTB. */
182+
if (current_el() == 2) {
183+
#ifdef MMU
184+
uintptr_t dts = (uintptr_t)dts_offset;
185+
#else
186+
uintptr_t dts = 0;
187+
#endif
188+
wolfBoot_printf("do_boot: cleaning caches, disabling MMU, jumping to Linux\n");
189+
el2_cleanup_and_jump_to_linux((uintptr_t)app_offset, dts);
190+
/* unreachable */
191+
}
192+
193+
/* Non-EL2 path (EL3, etc.): legacy direct br x4 */
167194

168195
/* Set application address via x4 */
169196
asm volatile("mov x4, %0" : : "r"(app_offset));

src/boot_aarch64_start.S

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,4 +1334,125 @@ el2_to_el1_boot:
13341334
b .
13351335
#endif /* BOOT_EL1 && EL2_HYPERVISOR */
13361336

1337+
1338+
/*
1339+
* Clean entire D-cache to the Point of Coherency (PoC), invalidate the
1340+
* I-cache to the Point of Unification (PoU), disable MMU + I/D-cache at
1341+
* EL2, then branch to a Linux kernel entry point with the DTB pointer
1342+
* in x0.
1343+
*
1344+
* Terminology (ARM ARM B2.8):
1345+
* PoC - Point of Coherency: the point at which all observers (CPUs,
1346+
* DMA masters, etc.) see the same memory. Cleaning to PoC
1347+
* guarantees the image bytes we memcpy'd are visible to the
1348+
* kernel's first uncached instruction fetches.
1349+
* PoU - Point of Unification: the point at which instruction and data
1350+
* caches converge. Invalidating I-cache to PoU ensures stale
1351+
* fetches are discarded before we hand off.
1352+
*
1353+
* The ARM64 Linux boot protocol (Documentation/arch/arm64/booting.rst)
1354+
* REQUIRES the kernel to be entered with MMU off, D-cache off, and the
1355+
* loaded image cleaned to PoC. Linux's arm64_panic_block_init() panics
1356+
* with "Non-EFI boot detected with MMU and caches enabled" if it detects
1357+
* MMU/caches were on at entry.
1358+
*
1359+
* wolfBoot's startup (line ~347 above) enables MMU+I+D cache at EL2 for
1360+
* its own use. This helper undoes that just before handing off to Linux.
1361+
*
1362+
* Parameters:
1363+
* x0 = kernel entry point
1364+
* x1 = device tree blob address
1365+
*
1366+
* Clobbers x0-x11; never returns.
1367+
*/
1368+
.global el2_cleanup_and_jump_to_linux
1369+
el2_cleanup_and_jump_to_linux:
1370+
/* Save entry/dtb out of the clobber range used by the dcache loop */
1371+
mov x29, x0 /* x29 = entry */
1372+
mov x30, x1 /* x30 = dtb */
1373+
1374+
/* ---- 1. Clean & invalidate entire data cache to PoC by set/way ----
1375+
* Standard ARMv8 routine, adapted from arm-trusted-firmware /
1376+
* U-Boot / Linux. Iterates every (level, set, way) triple and
1377+
* issues `dc cisw` on it. Terminates at the Level of Coherency
1378+
* (LoC) read from CLIDR_EL1. */
1379+
mrs x0, clidr_el1
1380+
and x3, x0, #0x07000000 /* x3 = LoC (level of coherency) */
1381+
lsr x3, x3, #23 /* x3 = LoC * 2 */
1382+
cbz x3, dcache_done
1383+
mov x10, #0 /* x10 = current cache level << 1 */
1384+
1385+
dcache_level_loop:
1386+
add x2, x10, x10, lsr #1 /* x2 = level * 3 */
1387+
lsr x1, x0, x2 /* x1 = ctype field for this level */
1388+
and x1, x1, #7
1389+
cmp x1, #2
1390+
b.lt dcache_skip_level /* No data cache at this level */
1391+
msr csselr_el1, x10 /* Select cache level (instruction = 0) */
1392+
isb
1393+
mrs x1, ccsidr_el1
1394+
and x2, x1, #7 /* x2 = log2(line length) - 4 */
1395+
add x2, x2, #4 /* x2 = log2(line length) */
1396+
mov x4, #0x3ff
1397+
and x4, x4, x1, lsr #3 /* x4 = max way number */
1398+
clz w5, w4 /* x5 = bit position of way size */
1399+
mov x7, #0x7fff
1400+
and x7, x7, x1, lsr #13 /* x7 = max set number */
1401+
1402+
dcache_set_loop:
1403+
mov x9, x4 /* x9 = current way */
1404+
dcache_way_loop:
1405+
lsl x6, x9, x5
1406+
orr x11, x10, x6 /* level | way */
1407+
lsl x6, x7, x2
1408+
orr x11, x11, x6 /* level | way | set */
1409+
dc cisw, x11 /* clean & invalidate by set/way */
1410+
subs x9, x9, #1
1411+
b.ge dcache_way_loop
1412+
subs x7, x7, #1
1413+
b.ge dcache_set_loop
1414+
1415+
dcache_skip_level:
1416+
add x10, x10, #2
1417+
cmp x3, x10
1418+
b.gt dcache_level_loop
1419+
1420+
dcache_done:
1421+
mov x10, #0
1422+
msr csselr_el1, x10
1423+
dsb sy
1424+
isb
1425+
1426+
/* ---- 2. Invalidate entire I-cache to PoU ----
1427+
* `ic iallu` invalidates all instruction cache to the Point of
1428+
* Unification for the local PE. */
1429+
ic iallu
1430+
dsb ish
1431+
isb
1432+
1433+
/* ---- 3. Disable MMU + I-cache + D-cache at EL2 ----
1434+
* SCTLR_EL2.M (bit 0) = MMU enable
1435+
* SCTLR_EL2.C (bit 2) = D-cache enable
1436+
* SCTLR_EL2.I (bit 12) = I-cache enable
1437+
*/
1438+
mrs x0, SCTLR_EL2
1439+
bic x0, x0, #(1 << 0) /* M */
1440+
bic x0, x0, #(1 << 2) /* C */
1441+
bic x0, x0, #(1 << 12) /* I */
1442+
msr SCTLR_EL2, x0
1443+
isb
1444+
1445+
/* ---- 4. Set up Linux ARM64 boot protocol registers and jump ----
1446+
* x0 = DTB address
1447+
* x1 = 0 (reserved)
1448+
* x2 = 0 (reserved)
1449+
* x3 = 0 (reserved)
1450+
* PC = entry
1451+
*/
1452+
mov x0, x30 /* x0 = DTB */
1453+
mov x1, xzr
1454+
mov x2, xzr
1455+
mov x3, xzr
1456+
br x29 /* jump to kernel entry; never returns */
1457+
13371458
.end

src/sdhci.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ static uint32_t sdhci_get_response_bits(int from, int count)
581581
/* voltage: 0=off or SDHCI_SRS10_BVS_[X_X]V */
582582
static int sdcard_power_init_seq(uint32_t voltage)
583583
{
584+
int retries;
584585
/* Set power to specified voltage */
585586
int status = sdhci_set_power(voltage);
586587
#ifdef DEBUG_SDHCI
@@ -590,9 +591,18 @@ static int sdcard_power_init_seq(uint32_t voltage)
590591
SDHCI_REG(SDHCI_SRS09), SDHCI_REG(SDHCI_SRS10),
591592
SDHCI_REG(SDHCI_SRS11), SDHCI_REG(SDHCI_SRS12));
592593
#endif
593-
if (status == 0) {
594-
/* send CMD0 (go idle) to reset card */
594+
/* SD spec requires >= 1ms after power stabilizes before CMD0.
595+
* Some cards and the ZynqMP Arasan controller need more settling
596+
* time after the slot-type change + soft reset in sdhci_platform_init().
597+
* Use a retry loop: if CMD0 fails, wait and retry (self-calibrating). */
598+
for (retries = 0; retries < 10; retries++) {
595599
status = sdhci_cmd(MMC_CMD0_GO_IDLE, 0, SDHCI_RESP_NONE);
600+
if (status == 0)
601+
break;
602+
udelay(10000); /* 10ms between retries */
603+
}
604+
if (retries > 0 && status == 0) {
605+
wolfBoot_printf("SD: CMD0 succeeded after %d retries\n", retries);
596606
}
597607
if (status == 0) {
598608
/* send the operating conditions command */
@@ -1387,6 +1397,11 @@ int sdhci_init(void)
13871397
/* Call platform-specific initialization (clocks, resets, pin mux) */
13881398
sdhci_platform_init();
13891399

1400+
/* Allow controller to settle after platform init (slot type change,
1401+
* soft reset, clock configuration). Without this, the controller may
1402+
* not be ready to accept register writes on some platforms. */
1403+
udelay(1000); /* 1ms */
1404+
13901405
/* Reset the host controller */
13911406
sdhci_reg_or(SDHCI_HRS00, SDHCI_HRS00_SWR);
13921407
/* Bit will clear when reset is done */
@@ -1482,6 +1497,9 @@ int sdhci_init(void)
14821497
/* Setup 400khz starting clock */
14831498
sdhci_set_clock(SDHCI_CLK_400KHZ);
14841499

1500+
/* Allow clock to stabilize before issuing first command */
1501+
udelay(1000); /* 1ms */
1502+
14851503
#ifdef DISK_EMMC
14861504
/* Run full eMMC card initialization */
14871505
status = emmc_card_full_init();

0 commit comments

Comments
 (0)