diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index 11159bf3c2..91192abb07 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -211,6 +211,24 @@ jobs: arch: ppc config-file: ./config/examples/nxp-t2080.config + # Additional T2080 board-specific compile tests for alternate board macros. + nxp_t2080_test_naii_68ppc2: + uses: ./.github/workflows/test-build-powerpc.yml + with: + arch: ppc + config-file: ./config/examples/nxp-t2080.config + make-args: CFLAGS_EXTRA=-DBOARD_NAII_68PPC2 + + # VPX3-152 compile test: validates board-specific code paths build cleanly. + # Uses default config addresses (128MB layout); real hardware needs the + # VPX3-152 address overrides uncommented in .config. + nxp_t2080_vpx3152_test: + uses: ./.github/workflows/test-build-powerpc.yml + with: + arch: ppc + config-file: ./config/examples/nxp-t2080.config + make-args: CFLAGS_EXTRA=-DBOARD_CW_VPX3152 + nxp_ls1028a_test: uses: ./.github/workflows/test-build.yml with: diff --git a/arch.mk b/arch.mk index 74663afa57..967eb1d906 100644 --- a/arch.mk +++ b/arch.mk @@ -1063,6 +1063,9 @@ ifeq ($(TARGET),nxp_t2080) CFLAGS+=$(ARCH_FLAGS) BIG_ENDIAN=1 CFLAGS+=-DMMU -DWOLFBOOT_FDT -DWOLFBOOT_DUALBOOT + # Support U-Boot legacy uImage header: strip 64-byte header before jumping + # to the OS image (e.g. uVxWorks, uImage Linux kernel). + CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY CFLAGS+=-pipe # use pipes instead of temp files CFLAGS+=-feliminate-unused-debug-types LDFLAGS+=$(ARCH_FLAGS) diff --git a/config/examples/nxp-t2080.config b/config/examples/nxp-t2080.config index 6627f5b7bc..fc91d0347c 100644 --- a/config/examples/nxp-t2080.config +++ b/config/examples/nxp-t2080.config @@ -1,13 +1,17 @@ # NXP T2080 wolfBoot Configuration # Default board: T2080 RDB (66.66 MHz oscillator, DDR3L SODIMM) # -# Board selection: uncomment exactly one line to override the default. -# Default (no define): T2080 RDB (66.66 MHz oscillator, DDR3L SODIMM) -# BOARD_CW_VPX3152: CW VPX3-152 (66.667 MHz oscillator, DDR3L) -# BOARD_NAII_68PPC2: NAII 68PPC2 (100 MHz oscillator, 8GB DDR3) +# Board selection: +# Default (no define): T2080 RDB (66.66 MHz oscillator, DDR3L SODIMM) +# BOARD_NAII_68PPC2: NAII 68PPC2 (100 MHz oscillator, 8 GB DDR3) +# BOARD_CW_VPX3152: CW VPX3-152 (66.667 MHz oscillator, 4 GB DDR3L) # -#CFLAGS_EXTRA+=-DBOARD_CW_VPX3152 +# For NAII 68PPC2, uncomment the line below (addresses are the same as RDB): #CFLAGS_EXTRA+=-DBOARD_NAII_68PPC2 +# +# For CW VPX3-152 (256 MB NOR flash at 0xF0000000), uncomment the BOARD +# define AND the address override block at the bottom of this file. +#CFLAGS_EXTRA+=-DBOARD_CW_VPX3152 ARCH=PPC TARGET=nxp_t2080 @@ -35,23 +39,21 @@ DUALBANK_SWAP?=0 WOLFTPM?=0 OPTIMIZATION_LEVEL?=1 -# NOR Base Address -# T2080 RDB: 128MB flash at 0xE8000000, wolfBoot at top (0xEFFE0000) -# CW VPX3-152: 256MB flash at 0xF0000000, wolfBoot at top (0xFFFE0000) +# ----------------------------------------------------------------------------- +# Default addresses: T2080 RDB / NAII 68PPC2 (128 MB NOR flash @ 0xE8000000) +# ----------------------------------------------------------------------------- + +# NOR Base Address: wolfBoot at top of flash ARCH_FLASH_OFFSET?=0xEFFE0000 -#ARCH_FLASH_OFFSET?=0xFFFE0000 # CW VPX3-152 # CPC SRAM address (must match L2SRAM_ADDR in nxp_ppc.h) -# CW VPX3-152: relocated to 0xEE900000 to avoid 256MB flash TLB overlap L2SRAM_ADDR?=0xF8F00000 -#L2SRAM_ADDR?=0xEE900000 # CW VPX3-152 # Flash Sector Size WOLFBOOT_SECTOR_SIZE?=0x10000 # wolfBoot start address WOLFBOOT_ORIGIN?=0xEFFE0000 -#WOLFBOOT_ORIGIN?=0xFFFE0000 # CW VPX3-152 # wolfBoot partition size (custom) BOOTLOADER_PARTITION_SIZE=0x20000 @@ -59,26 +61,36 @@ BOOTLOADER_PARTITION_SIZE=0x20000 WOLFBOOT_PARTITION_SIZE?=0x100000 # Location in Flash for Application Partition WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFEE0000 -#WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xFFEE0000 # CW VPX3-152 # Load Partition to RAM Address WOLFBOOT_LOAD_ADDRESS?=0x19000 # Location in Flash for Update Partition WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFDE0000 -#WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xFFDE0000 # CW VPX3-152 # Location of temporary sector used during updates WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFDD0000 -#WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xFFDD0000 # CW VPX3-152 # DTS (Device Tree) WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 -#WOLFBOOT_DTS_BOOT_ADDRESS?=0xF0040000 # CW VPX3-152 WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 -#WOLFBOOT_DTS_UPDATE_ADDRESS?=0xF0050000 # CW VPX3-152 # DTS Load to RAM Address WOLFBOOT_LOAD_DTS_ADDRESS?=0x200000 +# ----------------------------------------------------------------------------- +# CW VPX3-152 address overrides (256 MB NOR flash @ 0xF0000000) +# Uncomment ALL lines below when building for VPX3-152. +# Also uncomment CFLAGS_EXTRA+=-DBOARD_CW_VPX3152 at the top of this file. +# ----------------------------------------------------------------------------- +#ARCH_FLASH_OFFSET=0xFFFE0000 +#L2SRAM_ADDR=0xEE900000 +#WOLFBOOT_ORIGIN=0xFFFE0000 +#WOLFBOOT_PARTITION_BOOT_ADDRESS=0xFFEE0000 +#WOLFBOOT_PARTITION_UPDATE_ADDRESS=0xFFDE0000 +#WOLFBOOT_PARTITION_SWAP_ADDRESS=0xFFDD0000 +#WOLFBOOT_DTS_BOOT_ADDRESS=0xF0040000 +#WOLFBOOT_DTS_UPDATE_ADDRESS=0xF0050000 +#WOLFBOOT_LOAD_DTS_ADDRESS=0xF000000 + # Flash erase/write/read test at update partition address #TEST_FLASH?=1 diff --git a/docs/Targets.md b/docs/Targets.md index fbc02e6181..edf48a1c10 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -4009,16 +4009,28 @@ Flash factory_custom.bin to NOR base 0xE800_0000 ## NXP QorIQ T2080 PPC -The NXP QorIQ T2080 is a PPC e6500 based processor (four cores). Support has been tested with the NAII 68PPC2. +The NXP QorIQ T2080 is a PPC e6500 based processor (four cores). Three board +variants are supported: + +| Board | Config Define | Oscillator | DDR | NOR Flash | +|-------|---------------|-----------|-----|-----------| +| T2080 RDB (default) | _(none)_ | 66.66 MHz | DDR3L SODIMM | 128 MB @ `0xE8000000` | +| Curtiss-Wright VPX3-152 | `BOARD_CW_VPX3152` | 66.667 MHz | 4 GB DDR3L | 256 MB @ `0xF0000000` | +| NAII 68PPC2 | `BOARD_NAII_68PPC2` | 100 MHz | 8 GB DDR3 | 128 MB @ `0xE8000000` | + +> **Note:** The T2080 RDB DDR register values in `hal/nxp_t2080.h` are +> populated from a U-Boot register dump but have not been validated on +> hardware. The NAII 68PPC2 and CW VPX3-152 DDR configs are populated +> and tested. Example configuration: [/config/examples/nxp-t2080.config](/config/examples/nxp-t2080.config). -Stock layout is default; for NAII 68PPC2, uncomment the "# NAII 68PPC2:" lines and comment the stock lines. +See [Board Selection](#board-selection) below for per-board setup. ### Design NXP T2080 PPC -The QorIQ requires a Reset Configuration Word (RCW) to define the boot parameters, which resides at the start of the flash (0xE8000000). +The QorIQ requires a Reset Configuration Word (RCW) to define the boot parameters, which resides at the start of the flash (`0xE8000000` for 128 MB boards, `0xF0000000` for the 256 MB CW VPX3-152). -The flash boot entry point is `0xEFFFFFFC`, which is an offset jump to wolfBoot initialization boot code. Initially the PowerPC core enables only a 4KB region to execute from. The initialization code (`src/boot_ppc_start.S`) sets the required CCSR and TLB for memory addressing and jumps to wolfBoot `main()`. +The flash boot entry point is the last 4 bytes of the NOR flash region (`0xEFFFFFFC` for 128 MB flash, `0xFFFFFFFC` for 256 MB flash), which is an offset jump to wolfBoot initialization boot code. Initially the PowerPC core enables only a 4KB region to execute from. The initialization code (`src/boot_ppc_start.S`) sets the required CCSR and TLB for memory addressing and jumps to wolfBoot `main()`. #### Boot Sequence and Hardware Constraints @@ -4041,7 +4053,7 @@ CPC SRAM is unreliable for stores on cold power-on — L1 dirty-line evictions through CoreNet to CPC cause bus errors (silent CPU checkstop with `MSR[ME]=0`). The fix (matching U-Boot) uses L1 locked D-cache as the initial 16KB stack: `dcbz` allocates cache lines without bus reads, `dcbtls` locks them so they -are never evicted. The locked lines at `L1_CACHE_ADDR` (0xF8E00000) are +are never evicted. The locked lines at `L1_CACHE_ADDR` (`0xF8E00000`; `0xEE800000` on VPX3-152) are entirely core-local. After DDR init in `hal_init()`, the stack relocates to DDR and the CPC switches from SRAM to L3 cache mode. @@ -4052,9 +4064,27 @@ boot, allowing L1 I-cache to cache instruction fetches while preventing speculative prefetch to the IFC. C code switches to `MAS2_I | MAS2_G` during flash write/erase (command mode), then `MAS2_M` for full caching afterward. +**CCSRBAR Relocation (CW VPX3-152 only)** + +The default CCSRBAR at `0xFE000000` (16 MB) falls within the VPX3-152's 256 MB +flash VA range (`0xF0000000`–`0xFFFFFFFF`). The startup assembly relocates +CCSRBAR to `0xEF000000` (just below flash). The CPC SRAM and L1 cache addresses +are also relocated to `0xEE900000`/`0xEE800000` to avoid overlap. + +**Boot ROM TLB invalidation (CW VPX3-152 only)** + +For VPX3-152, TLB1 Entry 2 maps the full 256 MB flash at `0xF0000000-0xFFFFFFFF` +with IPROT. This range overlaps with the boot ROM TLB (default 4 KB at +`0xFFFFF000`, resized to 256 KB at `0xFFFC0000` by `shrink_default_tlb1`). +Overlapping TLB1 entries cause an e6500 multi-hit machine check. After Entry 2 +is created, the boot ROM TLB is cleared via `tlbwe` with `V=0` and `IPROT=0`; +Entry 2 then serves all instruction fetches for the flash region including the +boot ROM range. For NAII 68PPC2 and T2080 RDB (128 MB flash at `0xE8000000`), +there is no overlap and the boot ROM TLB remains valid alongside Entry 2. + **RAMFUNCTION Constraints** -The NAII 68PPC2 NOR flash (two S29GL01GS x8 in parallel, 16-bit bus) enters +The NOR flash (two S29GL01GS x8 in parallel, 16-bit bus) enters command mode bank-wide — instruction fetches during program/erase return status data instead of code. All flash write/erase functions are marked `RAMFUNCTION`, placed in `.ramcode`, copied to DDR, and remapped via TLB9. Key rules: @@ -4094,30 +4124,38 @@ machine check (exceptions instead of checkstop), debug, and recoverable interrupt enable. Branch prediction (BUCSR) is deferred to `hal_init()` after DDR stack relocation. -**UART Debug Checkpoints (`DEBUG_UART=1`)** +### Building wolfBoot for NXP T2080 PPC + +By default wolfBoot will use `powerpc-linux-gnu-` cross-compiler prefix. These tools can be installed with the Debian package `gcc-powerpc-linux-gnu` (`sudo apt install gcc-powerpc-linux-gnu`). + +#### Board Selection -Assembly startup emits characters to UART0 (0xFE11C500, 115200 baud): +Copy the example config and select your board: +**T2080 RDB (default):** ``` -1 - CPC invalidate start A - L2 cluster enable start -2 - CPC invalidate done B - L2 cluster enabled -3 - CPC SRAM configured E - L1 cache setup -4 - SRAM LAW configured F - L1 I-cache enabled -5 - Flash TLB configured G - L1 D-cache enabled -6 - CCSRBAR TLB configured D - Stack ready (L1 locked cache) -7 - SRAM TLB configured Z - About to jump to C code -8 - CPC enabled +cp ./config/examples/nxp-t2080.config .config ``` -### Building wolfBoot for NXP T2080 PPC +**Curtiss-Wright VPX3-152:** +``` +cp ./config/examples/nxp-t2080.config .config +``` +Then in `.config`, uncomment `CFLAGS_EXTRA+=-DBOARD_CW_VPX3152` and all lines +marked with `# CW VPX3-152` (flash offset, SRAM address, origin, partition addresses, +DTS addresses). -By default wolfBoot will use `powerpc-linux-gnu-` cross-compiler prefix. These tools can be installed with the Debian package `gcc-powerpc-linux-gnu` (`sudo apt install gcc-powerpc-linux-gnu`). +**NAII 68PPC2:** +``` +cp ./config/examples/nxp-t2080.config .config +``` +Then in `.config`, uncomment `CFLAGS_EXTRA+=-DBOARD_NAII_68PPC2`. + +#### Build -The `make` creates a `factory.bin` image that can be programmed at `0xE8080000` -(For NAII 68PPC2, first edit `nxp-t2080.config` to uncomment the NAII 68PPC2 lines.) +The `make` creates a `factory.bin` image that can be programmed to the application partition address. ``` -cp ./config/examples/nxp-t2080.config .config make clean make keytools make @@ -4146,19 +4184,31 @@ CROSS_COMPILE_PATH=/opt/fsl-qoriq/2.0/sysroots/ppce6500-fsl-linux/usr ### Programming NXP T2080 PPC -NOR Flash Region: `0xE8000000 - 0xEFFFFFFF` (128 MB) +NOR Flash Regions: +- **T2080 RDB / NAII 68PPC2**: `0xE8000000 - 0xEFFFFFFF` (128 MB) +- **CW VPX3-152**: `0xF0000000 - 0xFFFFFFFF` (256 MB) -Flash Layout (with files): +Flash Layout (T2080 RDB / NAII 68PPC2, 128 MB flash): | Description | File | Address | | ----------- | ---- | ------- | -| Reset Configuration Word (RCW) | `68PPC2_RCW_v0p7.bin` | `0xE8000000` | +| Reset Configuration Word (RCW) | _(board-specific)_ | `0xE8000000` | | Frame Manager Microcode | `fsl_fman_ucode_t2080_r1.0.bin` | `0xE8020000` | | Signed Application | `test-app/image_v1_signed.bin` | `0xE8080000` | -| wolfBoot | `wolfboot.bin` | `0xEFF40000` | -| Boot Entry Point (with offset jump to init code) | | `0xEFFFFFFC` | +| wolfBoot | `wolfboot.bin` | `0xEFFE0000` | +| Boot Entry Point (offset jump to init code) | | `0xEFFFFFFC` | -Or program the `factory.bin` to `0xE8080000` +Flash Layout (CW VPX3-152, 256 MB flash): + +| Description | File | Address | +| ----------- | ---- | ------- | +| Reset Configuration Word (RCW) | _(board-specific)_ | `0xF0000000` | +| Frame Manager Microcode | `fsl_fman_ucode_t2080_r1.0.bin` | `0xF0020000` | +| Signed Application | `test-app/image_v1_signed.bin` | `0xF0080000` | +| wolfBoot | `wolfboot.bin` | `0xFFFE0000` | +| Boot Entry Point (offset jump to init code) | | `0xFFFFFFFC` | + +Or program the `factory.bin` to the application partition address. Example Boot Debug Output (with `DEBUG_UART=1`): @@ -4197,11 +4247,11 @@ See these TRACE32 demo script files: ``` DO flash_cfi.cmm -FLASH.ReProgram 0xEFF40000--0xEFFFFFFF /Erase -Data.LOAD.binary wolfboot.bin 0xEFF40000 +FLASH.ReProgram 0xEFFE0000--0xEFFFFFFF /Erase +Data.LOAD.binary wolfboot.bin 0xEFFE0000 FLASH.ReProgram.off -Data.LOAD.binary wolfboot.bin 0xEFF40000 /Verify +Data.LOAD.binary wolfboot.bin 0xEFFE0000 /Verify ``` Note: To disable the flash protection bits use: @@ -4219,7 +4269,11 @@ Data.Set 0xE8000000 %W 0x9090 Data.Set 0xE8000000 %W 0x0000 ``` -#### Flash Programming with CodeWarrior TAP +#### Flash Programming with CodeWarrior TAP (Experimental) + +> **Note:** CodeWarrior TAP debugging has not been validated for this target. +> Lauterbach TRACE32 is the recommended debug probe. The following steps are +> provided for reference only. In CodeWarrior use the `Flash Programmer` tool (see under Commander View -> Miscellaneous) * Connection: "CodeWarrior TAP Connection" @@ -4231,13 +4285,113 @@ In CodeWarrior use the `Flash Programmer` tool (see under Commander View -> Misc ``` tftp 1000000 wolfboot.bin -protect off eff40000 +C0000 -erase eff40000 +C0000 -cp.b 1000000 eff40000 C0000 -protect on eff40000 +C0000 -cmp.b 1000000 eff40000 C0000 +protect off effe0000 +20000 +erase effe0000 +20000 +cp.b 1000000 effe0000 20000 +protect on effe0000 +20000 +cmp.b 1000000 effe0000 20000 +``` + +#### CW VPX3-152 PABS Recovery and Testing + +The CW VPX3-152 has a Permanent Alternate Boot Site (PABS) — a second U-Boot on a +separate flash device. When jumper JB1 (ALT-BOOT) is installed and the board is reset, +it boots from PABS U-Boot (prompt: `VPX3-152 PABS=>`), which can reprogram the main +NOR flash via TFTP. This is used for wolfBoot development and testing. + +Reference: CW VPX3-152 Firmware User's Manual (838400 rev 6), Section 6. + +**Prerequisites:** +- JB1: Controlled by Pi4 GPIO 16 relay (or physical jumper) +- JB5: Must be removed (NOR write protect disabled) +- NVMRO: Must be grounded +- Serial: COM1 at 115200 N81 (P2 connector) +- Ethernet: GE02 (FM1@DTSEC1) on P1 connector + +**Entering PABS mode:** +1. Install JB1 jumper (or assert GPIO 16 high) +2. Reset the board +3. Board boots to `VPX3-152 PABS=>` prompt + +**Network setup in PABS U-Boot:** +``` +setenv serverip 10.0.4.24 +setenv ipaddr 10.0.4.152 +setenv gatewayip 10.0.4.1 +setenv netmask 255.255.255.0 +``` + +**Flash wolfBoot from PABS:** +``` +tftp 0x1000000 wolfboot.bin +protect off 0xFFFE0000 0xFFFFFFFF +erase 0xFFFE0000 0xFFFFFFFF +cp.b 0x1000000 0xFFFE0000 $filesize +cmp.b 0x1000000 0xFFFE0000 $filesize +``` + +**Flash signed application from PABS:** +``` +tftp 0x1000000 image_v1_signed.bin +protect off 0xFFEE0000 0xFFFDFFFF +erase 0xFFEE0000 0xFFFDFFFF +cp.b 0x1000000 0xFFEE0000 $filesize +cmp.b 0x1000000 0xFFEE0000 $filesize +``` + +**Boot wolfBoot:** Remove JB1 jumper (or deassert GPIO 16), reset the board. + +**Restore original CW U-Boot (from PABS):** +``` +fwupd 608603-100_rev- +``` + +**DDR Register Verification:** + +The CW VPX3-152 DDR register values in `hal/nxp_t2080.h` were obtained from a +U-Boot register dump. To verify or update these values, boot into PABS or main +U-Boot and run the following `md.l` commands. Use CCSRBAR `0xEF000000` (CW U-Boot +relocates CCSRBAR) or `0xFE000000` (default, check with `bdinfo`): + +``` +# CS Bounds and Config (DDR_BASE + 0x000, 0x080, 0x0C0) +md.l 0xef008000 4; md.l 0xef008080 4; md.l 0xef0080c0 4 +# Timing (DDR_BASE + 0x100, 0x160) +md.l 0xef008100 4; md.l 0xef008160 3 +# Config/Mode/Clock (DDR_BASE + 0x110, 0x130) +md.l 0xef008110 8; md.l 0xef008130 1 +# ZQ/Write Leveling (DDR_BASE + 0x170, 0x190) +md.l 0xef008170 3; md.l 0xef008190 2 +# RCW/Mode3-8 (DDR_BASE + 0x180, 0x200) +md.l 0xef008180 2; md.l 0xef008200 6 +# Control Driver (DDR_BASE + 0xB28) +md.l 0xef008b28 2 +# Error registers (DDR_BASE + 0xE40, 0xE58) +md.l 0xef008e40 3; md.l 0xef008e58 1 ``` +**Flashing wolfBoot via PABS U-Boot:** + +The PABS U-Boot maps main NOR flash starting at `0x80000000`. To convert wolfBoot +flash addresses to PABS addresses, replace the `0xF` prefix with `0x8` (e.g. +`0xFFFE0000` becomes `0x8FFE0000`). After configuring the network, use: + +``` +# Flash wolfBoot (128 KB at top of flash) +tftp 0x1000000 wolfboot.bin +erase 0x8FFE0000 +0x20000 +cp.b 0x1000000 0x8FFE0000 $filesize +cmp.b 0x1000000 0x8FFE0000 $filesize + +# Flash signed application (1 MB boot partition) +tftp 0x1000000 image_v1_signed.bin +erase 0x8FEE0000 +0x100000 +cp.b 0x1000000 0x8FEE0000 $filesize +cmp.b 0x1000000 0x8FEE0000 $filesize +``` + +Remove the JB1 jumper and power cycle to boot from main flash with wolfBoot. + ### Debugging NXP T2080 PPC #### Lauterbach @@ -4265,9 +4419,11 @@ sYmbol.SourcePATH.SetBaseDir ~/wolfBoot Data.LOAD.Elf wolfboot.elf /NoCODE /StripPART "/home/username/wolfBoot/" ``` -#### CodeWarrior TAP +#### CodeWarrior TAP (Experimental) -This is an example for debugging the T2080 with CodeWarrior TAP, however we were not successful using it. The Lauterbach is what we ended up using to debug. +> **Note:** CodeWarrior TAP debugging has not been validated for this target. +> Lauterbach TRACE32 is the recommended debug probe. The following steps are +> provided for reference only. Start GDB Proxy: diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index 0025e1b49b..5c521d2556 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -217,11 +217,13 @@ #endif #endif - /* DDR stack configuration - relocate from CPC SRAM after DDR init - * Stack is at top of first 32MB of DDR, with 64KB reserved for stack - * Stack grows downward from DDR_STACK_TOP */ + /* DDR stack configuration - relocate from CPC SRAM after DDR init. + * Stack must be ABOVE the image load area to avoid being overwritten + * when the OS image is copied to WOLFBOOT_LOAD_ADDRESS (0x100000). + * With WOLFBOOT_PARTITION_SIZE=0x800000 the image area ends at 0x900000. + * Place stack at 16MB to be safely above the image + DTS regions. */ #define DDR_STACK_SIZE (64 * 1024) /* 64KB stack in DDR */ - #define DDR_STACK_TOP 0x02000000UL /* Top of first 32MB */ + #define DDR_STACK_TOP 0x01000000UL /* 16MB - above image area */ #define DDR_STACK_BASE (DDR_STACK_TOP - DDR_STACK_SIZE) /* DDR address where .ramcode is copied before CPC SRAM is released. diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 27980b240e..c756401fd9 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -72,6 +72,12 @@ static void hal_mp_init(void); #define FLASH_UNLOCK_ADDR2 0x555 #endif +/* FLASH_CMD_SECTOR: sector used for flash command sequences that don't target + * a specific sector (reset, unlock, PPB entry/exit). AMD flash command decode + * only looks at the low address bits, so sector 0 works for all boards with + * a properly mapped full-flash TLB entry. */ +#define FLASH_CMD_SECTOR 0 + /* Flash IO Helpers */ #if FLASH_CFI_WIDTH == 16 #define FLASH_IO8_WRITE(sec, n, val) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) = (((val) << 8) | (val)) @@ -164,12 +170,12 @@ void hal_ddr_init(void) /* DDR SDRAM mode configuration */ set32(DDR_SDRAM_MODE, DDR_SDRAM_MODE_VAL); set32(DDR_SDRAM_MODE_2, DDR_SDRAM_MODE_2_VAL); - set32(DDR_SDRAM_MODE_3, DDR_SDRAM_MODE_3_8_VAL); - set32(DDR_SDRAM_MODE_4, DDR_SDRAM_MODE_3_8_VAL); - set32(DDR_SDRAM_MODE_5, DDR_SDRAM_MODE_3_8_VAL); - set32(DDR_SDRAM_MODE_6, DDR_SDRAM_MODE_3_8_VAL); - set32(DDR_SDRAM_MODE_7, DDR_SDRAM_MODE_3_8_VAL); - set32(DDR_SDRAM_MODE_8, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_3, DDR_SDRAM_MODE_3_VAL); + set32(DDR_SDRAM_MODE_4, DDR_SDRAM_MODE_4_VAL); + set32(DDR_SDRAM_MODE_5, DDR_SDRAM_MODE_5_VAL); + set32(DDR_SDRAM_MODE_6, DDR_SDRAM_MODE_6_VAL); + set32(DDR_SDRAM_MODE_7, DDR_SDRAM_MODE_7_VAL); + set32(DDR_SDRAM_MODE_8, DDR_SDRAM_MODE_8_VAL); set32(DDR_SDRAM_MD_CNTL, DDR_SDRAM_MD_CNTL_VAL); /* DDR Configuration */ @@ -300,6 +306,9 @@ static void hal_reconfigure_cpc_as_cache(void) *dst++ = *src++; } + /* Ensure all stores have drained before flushing cache lines */ + __asm__ __volatile__("sync" ::: "memory"); + /* Flush D-cache and invalidate I-cache for the DDR copy */ flush_cache(DDR_RAMCODE_ADDR, ramcode_size); @@ -517,9 +526,9 @@ static int RAMFUNCTION hal_flash_ppb_unlock(uint32_t sector) uint32_t timeout; /* Enter PPB ASO (Address Space Overlay) */ - FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); - FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); - FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_SET_PPB_ENTRY); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, FLASH_UNLOCK_ADDR1, AMD_CMD_SET_PPB_ENTRY); /* Read PPB status for target sector: DQ0=0 means protected. * On 16-bit bus, must read both chip lanes to check both devices. */ @@ -531,16 +540,16 @@ static int RAMFUNCTION hal_flash_ppb_unlock(uint32_t sector) if ((ppb_status & 0x01) == 0x01) { #endif /* Both chips report unprotected — exit PPB mode and return */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC1); - FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC2); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_SET_PPB_EXIT_BC1); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_SET_PPB_EXIT_BC2); return 0; } /* Exit PPB ASO before calling printf (flash must be in read-array * mode for I-cache misses to fetch valid instructions) */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC1); - FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC2); - FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_SET_PPB_EXIT_BC1); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_SET_PPB_EXIT_BC2); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_RESET); udelay(50); #ifdef DEBUG_FLASH @@ -549,24 +558,24 @@ static int RAMFUNCTION hal_flash_ppb_unlock(uint32_t sector) #endif /* Re-enter PPB ASO for erase */ - FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); - FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); - FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_SET_PPB_ENTRY); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, FLASH_UNLOCK_ADDR1, AMD_CMD_SET_PPB_ENTRY); /* PPB Erase All (clears all sectors' PPBs) */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_PPB_UNLOCK_BC1); /* 0x80 */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_PPB_UNLOCK_BC2); /* 0x30 */ + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_PPB_UNLOCK_BC1); /* 0x80 */ + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_PPB_UNLOCK_BC2); /* 0x30 */ /* Wait for PPB erase completion — poll for toggle stop. * On 16-bit bus, read both chip lanes to ensure both complete. */ timeout = 0; do { #if FLASH_CFI_WIDTH == 16 - read1 = FLASH_IO16_READ(0, 0); - read2 = FLASH_IO16_READ(0, 0); + read1 = FLASH_IO16_READ(FLASH_CMD_SECTOR, 0); + read2 = FLASH_IO16_READ(FLASH_CMD_SECTOR, 0); #else - read1 = FLASH_IO8_READ(0, 0); - read2 = FLASH_IO8_READ(0, 0); + read1 = FLASH_IO8_READ(FLASH_CMD_SECTOR, 0); + read2 = FLASH_IO8_READ(FLASH_CMD_SECTOR, 0); #endif if (read1 == read2) break; @@ -574,11 +583,11 @@ static int RAMFUNCTION hal_flash_ppb_unlock(uint32_t sector) } while (timeout++ < 100000); /* 1 second */ /* Exit PPB ASO */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC1); - FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC2); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_SET_PPB_EXIT_BC1); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_SET_PPB_EXIT_BC2); /* Reset to read-array mode */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_RESET); udelay(50); if (timeout >= 100000) { @@ -663,6 +672,13 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) int ret = 0; uint32_t i, sector, offset, nwords; const uint32_t width_bytes = FLASH_CFI_WIDTH / 8; + uint32_t addr_off = address; + + /* Bounds check */ + if (addr_off >= FLASH_BASE_ADDR) + addr_off -= FLASH_BASE_ADDR; + if (addr_off + (uint32_t)len > FLASH_BANK_SIZE) + return -1; /* Enforce alignment to flash bus width */ if ((address % width_bytes) != 0 || (len % width_bytes) != 0) { @@ -688,7 +704,7 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) /* Reset flash to read-array mode in case previous operation left it * in command mode (e.g. after a timeout or incomplete operation) */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_RESET); udelay(50); /* Program one word at a time using AMD single-word program (0xA0). @@ -741,6 +757,13 @@ int RAMFUNCTION hal_flash_erase(uint32_t address, int len) { int ret = 0; uint32_t sector; + uint32_t addr_off = address; + + /* Bounds check */ + if (addr_off >= FLASH_BASE_ADDR) + addr_off -= FLASH_BASE_ADDR; + if (addr_off + (uint32_t)len > FLASH_BANK_SIZE) + return -1; /* adjust for flash base */ if (address >= FLASH_BASE_ADDR) @@ -752,7 +775,7 @@ int RAMFUNCTION hal_flash_erase(uint32_t address, int len) /* Reset flash to read-array mode in case previous operation left it * in command mode (e.g. after a timeout or incomplete operation) */ - FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + FLASH_IO8_WRITE(FLASH_CMD_SECTOR, 0, AMD_CMD_RESET); udelay(50); while (len > 0) { @@ -848,7 +871,7 @@ extern uint32_t _bootpg_addr; static void hal_mp_up(uint32_t bootpg, uint32_t spin_table_ddr) { uint32_t all_cores, active_cores, whoami; - int timeout = 50, i; + int timeout = 10000, i; /* 10000 * 100us = 1s, matches U-Boot convention */ whoami = get32(PIC_WHOAMI); /* Get current running core number */ all_cores = ((1 << CPU_NUMCORES) - 1); /* mask of all cores */ @@ -990,7 +1013,8 @@ static void hal_mp_init(void) void hal_prepare_boot(void) { - + /* Intentionally empty: pre-boot cleanup (cache flush, interrupt disable) + * is handled by boot_ppc.c:do_boot(). */ } #ifdef MMU diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h index 5428b7c60b..79c8431aae 100644 --- a/hal/nxp_t2080.h +++ b/hal/nxp_t2080.h @@ -277,31 +277,30 @@ enum ifc_amask_sizes { #define DDR_REF_RATE_PS 7800000 #else /* T2080 RDB: DDR3L SODIMM */ -/* TODO: Fill SPD parameters from DDR3L SODIMM datasheet */ -#define DDR_N_RANKS 2 /* TODO: confirm from CS_CONFIG dump */ -#define DDR_RANK_DENS 0x100000000 /* TODO: confirm */ +#define DDR_N_RANKS 2 +#define DDR_RANK_DENS 0x80000000 #define DDR_SDRAM_WIDTH 64 #define DDR_EC_SDRAM_W 8 -#define DDR_N_ROW_ADDR 16 /* TODO: confirm */ -#define DDR_N_COL_ADDR 10 /* TODO: confirm */ +#define DDR_N_ROW_ADDR 15 +#define DDR_N_COL_ADDR 10 #define DDR_N_BANKS 8 #define DDR_EDC_CONFIG 2 #define DDR_BURSTL_MASK 0x0c -#define DDR_TCKMIN_X_PS 1500 /* TODO: from DDR3L datasheet */ -#define DDR_TCMMAX_PS 3000 /* TODO: from DDR3L datasheet */ -#define DDR_CASLAT_X 0x000007E0 /* TODO */ -#define DDR_TAA_PS 13500 /* TODO */ -#define DDR_TRCD_PS 13500 /* TODO */ -#define DDR_TRP_PS 13500 /* TODO */ -#define DDR_TRAS_PS 36000 /* TODO */ -#define DDR_TRC_PS 49500 /* TODO */ -#define DDR_TFAW_PS 30000 /* TODO */ -#define DDR_TWR_PS 15000 /* TODO */ -#define DDR_TRFC_PS 260000 /* TODO */ -#define DDR_TRRD_PS 6000 /* TODO */ -#define DDR_TWTR_PS 7500 /* TODO */ -#define DDR_TRTP_PS 7500 /* TODO */ -#define DDR_REF_RATE_PS 7800000 /* TODO */ +#define DDR_TCKMIN_X_PS 1125 +#define DDR_TCMMAX_PS 3000 +#define DDR_CASLAT_X 0x000002FC +#define DDR_TAA_PS 13125 +#define DDR_TRCD_PS 13125 +#define DDR_TRP_PS 13125 +#define DDR_TRAS_PS 34000 +#define DDR_TRC_PS 47125 +#define DDR_TFAW_PS 27000 +#define DDR_TWR_PS 15000 +#define DDR_TRFC_PS 160000 +#define DDR_TRRD_PS 5000 +#define DDR_TWTR_PS 7500 +#define DDR_TRTP_PS 7500 +#define DDR_REF_RATE_PS 7800000 #endif #ifdef BOARD_NAII_68PPC2 @@ -326,6 +325,12 @@ enum ifc_amask_sizes { #define DDR_SDRAM_MODE_VAL 0x00441C70 #define DDR_SDRAM_MODE_2_VAL 0x00980000 #define DDR_SDRAM_MODE_3_8_VAL 0x00000000 +#define DDR_SDRAM_MODE_3_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_4_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_5_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_6_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_7_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_8_VAL DDR_SDRAM_MODE_3_8_VAL #define DDR_SDRAM_MD_CNTL_VAL 0x00000000 #define DDR_SDRAM_CFG_VAL 0xE7040000 @@ -371,6 +376,12 @@ enum ifc_amask_sizes { #define DDR_SDRAM_MODE_VAL 0x00461014 #define DDR_SDRAM_MODE_2_VAL 0x00A00000 #define DDR_SDRAM_MODE_3_8_VAL 0x00000000 +#define DDR_SDRAM_MODE_3_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_4_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_5_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_6_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_7_VAL DDR_SDRAM_MODE_3_8_VAL +#define DDR_SDRAM_MODE_8_VAL DDR_SDRAM_MODE_3_8_VAL #define DDR_SDRAM_MD_CNTL_VAL 0x00000000 #define DDR_SDRAM_CFG_VAL 0xE7240000 /* MEM_EN|SREN|ECC_EN, DDR3 */ @@ -395,55 +406,59 @@ enum ifc_amask_sizes { #define DDR_ERR_INT_EN_VAL 0x0000001D #define DDR_ERR_SBE_VAL 0x00010000 #else -/* T2080 RDB: DDR register values */ -/* TODO: Fill ALL values from Phase 1 U-Boot register dump: +/* T2080 RDB DDR register values from U-Boot register dump. + * T2080 RDB (default CCSRBAR = 0xFE000000, DDR_BASE = 0xFE008000): * md.l 0xfe008000 4; md.l 0xfe008010 4 (CS BNDS) * md.l 0xfe008080 4; md.l 0xfe0080c0 4 (CS CONFIG) * md.l 0xfe008100 4; md.l 0xfe008160 3 (TIMING) * md.l 0xfe008110 8; md.l 0xfe008130 1 (CONFIG/MODE/CLK) * md.l 0xfe008170 3; md.l 0xfe008190 2 (WRLVL) * md.l 0xfe008200 6; md.l 0xfe008b28 2 (MODE3-8/CDR) */ -#define DDR_CS0_BNDS_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS1_BNDS_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS2_BNDS_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS3_BNDS_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS0_CONFIG_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS1_CONFIG_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS2_CONFIG_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS3_CONFIG_VAL 0x00000000 /* TODO: from dump */ -#define DDR_CS_CONFIG_2_VAL 0x00000000 /* TODO: from dump */ - -#define DDR_TIMING_CFG_3_VAL 0x00000000 /* TODO: from dump */ -#define DDR_TIMING_CFG_0_VAL 0x00000000 /* TODO: from dump */ -#define DDR_TIMING_CFG_1_VAL 0x00000000 /* TODO: from dump */ -#define DDR_TIMING_CFG_2_VAL 0x00000000 /* TODO: from dump */ -#define DDR_TIMING_CFG_4_VAL 0x00000000 /* TODO: from dump */ -#define DDR_TIMING_CFG_5_VAL 0x00000000 /* TODO: from dump */ - -#define DDR_SDRAM_MODE_VAL 0x00000000 /* TODO: from dump */ -#define DDR_SDRAM_MODE_2_VAL 0x00000000 /* TODO: from dump */ -#define DDR_SDRAM_MODE_3_8_VAL 0x00000000 /* TODO: from dump */ -#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 /* TODO: from dump */ - -#define DDR_SDRAM_CFG_VAL 0x00000000 /* TODO: from dump */ -#define DDR_SDRAM_CFG_2_VAL 0x00000000 /* TODO: from dump */ - -#define DDR_SDRAM_INTERVAL_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS0_BNDS_VAL 0x000000FF +#define DDR_CS1_BNDS_VAL 0x000000FF +#define DDR_CS2_BNDS_VAL 0x00000000 +#define DDR_CS3_BNDS_VAL 0x00000000 +#define DDR_CS0_CONFIG_VAL 0x80044302 +#define DDR_CS1_CONFIG_VAL 0x80004302 +#define DDR_CS2_CONFIG_VAL 0x00000000 +#define DDR_CS3_CONFIG_VAL 0x00000000 +#define DDR_CS_CONFIG_2_VAL 0x00000000 + +#define DDR_TIMING_CFG_3_VAL 0x02081000 +#define DDR_TIMING_CFG_0_VAL 0x9011000E +#define DDR_TIMING_CFG_1_VAL 0xD0D8EE57 +#define DDR_TIMING_CFG_2_VAL 0x0048E15A +#define DDR_TIMING_CFG_4_VAL 0x00000001 +#define DDR_TIMING_CFG_5_VAL 0x05401400 + +#define DDR_SDRAM_MODE_VAL 0x00441E14 +#define DDR_SDRAM_MODE_2_VAL 0x00A00000 +#define DDR_SDRAM_MODE_3_VAL 0x00001E14 +#define DDR_SDRAM_MODE_4_VAL 0x00A00000 +#define DDR_SDRAM_MODE_5_VAL 0x00001E14 +#define DDR_SDRAM_MODE_6_VAL 0x00A00000 +#define DDR_SDRAM_MODE_7_VAL 0x00001E14 +#define DDR_SDRAM_MODE_8_VAL 0x00A00000 +#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 + +#define DDR_SDRAM_CFG_VAL 0xE7044000 +#define DDR_SDRAM_CFG_2_VAL 0x00401100 + +#define DDR_SDRAM_INTERVAL_VAL 0x0E38038E #define DDR_DATA_INIT_VAL 0xDEADBEEF -#define DDR_SDRAM_CLK_CNTL_VAL 0x00000000 /* TODO: from dump */ -#define DDR_ZQ_CNTL_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_CLK_CNTL_VAL 0x02800000 +#define DDR_ZQ_CNTL_VAL 0x89080600 -/* Write leveling - CRITICAL: board-specific values from U-Boot. - * These depend on PCB trace lengths and MUST come from the register dump. */ -#define DDR_WRLVL_CNTL_VAL 0x00000000 /* TODO: from dump */ -#define DDR_WRLVL_CNTL_2_VAL 0x00000000 /* TODO: from dump */ -#define DDR_WRLVL_CNTL_3_VAL 0x00000000 /* TODO: from dump */ +/* Write leveling - board-specific values from U-Boot register dump */ +#define DDR_WRLVL_CNTL_VAL 0x8675F607 +#define DDR_WRLVL_CNTL_2_VAL 0x0808080C +#define DDR_WRLVL_CNTL_3_VAL 0x0B0C0C09 -#define DDR_SDRAM_RCW_1_VAL 0x00000000 /* TODO: from dump */ -#define DDR_SDRAM_RCW_2_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_RCW_1_VAL 0x00000000 +#define DDR_SDRAM_RCW_2_VAL 0x00000000 -#define DDR_DDRCDR_1_VAL 0x00000000 /* TODO: from dump */ -#define DDR_DDRCDR_2_VAL 0x00000000 /* TODO: from dump */ +#define DDR_DDRCDR_1_VAL 0x80040000 +#define DDR_DDRCDR_2_VAL 0x00000001 #define DDR_ERR_INT_EN_VAL 0x0000001D #define DDR_ERR_SBE_VAL 0x00010000 diff --git a/include/image.h b/include/image.h index c9e7fcba10..0207ecfdcc 100644 --- a/include/image.h +++ b/include/image.h @@ -1380,7 +1380,11 @@ static inline int wb_flash_write_verify_word(struct wolfBoot_image *img, /* -- Image Formats -- */ /* Legacy U-Boot Image */ +#ifdef BIG_ENDIAN_ORDER +#define UBOOT_IMG_HDR_MAGIC 0x27051956UL +#else #define UBOOT_IMG_HDR_MAGIC 0x56190527UL +#endif #define UBOOT_IMG_HDR_SZ 64 /* --- Flattened Device Tree Blob */ diff --git a/src/boot_ppc_mp.S b/src/boot_ppc_mp.S index 7dc74698a1..e22de320b7 100644 --- a/src/boot_ppc_mp.S +++ b/src/boot_ppc_mp.S @@ -27,6 +27,21 @@ #define TORESET(x) (x - _secondary_start_page + BOOT_ROM_ADDR) +/* e6500 has 64-bit GPRs. When loading 32-bit addresses with bit 31 set + * (addresses >= 0x80000000), the lis instruction sign-extends, putting + * 0xFFFFFFFF in the upper 32 bits. This causes memory access failures. + * Use LOAD_ADDR32 macro to properly load 32-bit addresses on e6500. */ +#ifdef CORE_E6500 +#define LOAD_ADDR32(reg, addr) \ + li reg, 0; \ + oris reg, reg, (addr)@h; \ + ori reg, reg, (addr)@l +#else +#define LOAD_ADDR32(reg, addr) \ + lis reg, (addr)@h; \ + ori reg, reg, (addr)@l +#endif + /* Additional cores (mp) assembly code for core minimum startup and spin table. * All code must fit in 4KB, which gets virtually mapped via the TLB1 (MMU) and * loaded by core 0. Spin table entry TLB1(0) mapped for work is 64MB. @@ -34,9 +49,9 @@ .section .bootmp, "ax" .globl _secondary_start_page _secondary_start_page: - /* Time base, MAS7 and machine check pin enable */ - lis r0, (HID0_EMCP | HID0_TBEN | HID0_ENMAS7)@h - ori r0, r0, (HID0_EMCP | HID0_TBEN | HID0_ENMAS7)@l + /* Time base, MAS7 and machine check pin enable. + * HID0_EMCP=0x80000000 has bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r0, (HID0_EMCP | HID0_TBEN | HID0_ENMAS7)) mtspr SPRN_HID0, r0 #ifdef CORE_E500 @@ -98,9 +113,10 @@ branch_prediction: andi. r1, r3, L1CSR_CE@l beq 2b - /* Get our PIR to figure out our table entry */ - lis r3, TORESET(_spin_table_addr)@h - ori r3, r3, TORESET(_spin_table_addr)@l + /* Get our PIR to figure out our table entry. + * TORESET(...) resolves to address near BOOT_ROM_ADDR (0xFFFFF000), + * bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r3, TORESET(_spin_table_addr)) lwz r3, 0(r3) /* Use PIR to determine cluster/core for spin table base at r10 */ @@ -119,7 +135,7 @@ branch_prediction: mr r4, r0 mr r5, r4 #endif - slwi r8, r5, 6 /* spin table is padded to 64 bytes */ + slwi r8, r5, 6 /* multiply by ENTRY_SIZE (64 bytes) */ /* use r10 for the spin table base address */ add r10, r3, r8 @@ -160,8 +176,9 @@ l2_poll_invclear: addi r3, r8, 1 mtspr L2CSR1, r3 - /* enable L2 with no parity */ - lis r3, (L2CSR0_L2E)@h + /* enable L2 with no parity. + * L2CSR0_L2E=0x80000000 has bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r3, L2CSR0_L2E) mtspr L2CSR0, r3 isync 2: @@ -171,33 +188,37 @@ l2_poll_invclear: #endif #endif /* CORE_E5500 || CORE_E6500 */ 3: - /* setup mapping for the spin table, WIMGE=0b00100 */ - lis r13, TORESET(_spin_table_addr)@h - ori r13, r13, TORESET(_spin_table_addr)@l + /* setup mapping for the spin table, WIMGE=0b00100. + * TORESET(...) has bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r13, TORESET(_spin_table_addr)) lwz r13, 0(r13) /* mask by 4K */ rlwinm r13, r13, 0, 0, 19 lis r11, (MAS0_TLBSEL(1) | MAS0_ESEL(1))@h mtspr MAS0, r11 - lis r11, (MAS1_VALID | MAS1_IPROT)@h - ori r11, r11, (MAS1_TS | MAS1_TSIZE(BOOKE_PAGESZ_4K))@l + /* MAS1_VALID=0x80000000 has bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r11, (MAS1_VALID | MAS1_IPROT | MAS1_TS | + MAS1_TSIZE(BOOKE_PAGESZ_4K))) mtspr MAS1, r11 + /* Build MAS2 = r13 (spin table base, 4K aligned) | MAS2_M | MAS2_G. + * Note: both oris and ori must use r11 as source for the second op + * (the original code erroneously used r13 on the second op, which + * overwrote the upper 16 bits from the first oris). */ oris r11, r13, (MAS2_M | MAS2_G)@h - ori r11, r13, (MAS2_M | MAS2_G)@l + ori r11, r11, (MAS2_M | MAS2_G)@l mtspr MAS2, r11 oris r11, r13, (MAS3_SX | MAS3_SW | MAS3_SR)@h - ori r11, r13, (MAS3_SX | MAS3_SW | MAS3_SR)@l + ori r11, r11, (MAS3_SX | MAS3_SW | MAS3_SR)@l mtspr MAS3, r11 li r11, 0 mtspr MAS7, r11 tlbwe /* _bootpg_addr has the address of _second_half_boot_page - * jump there in AS=1 space with cache enabled - */ - lis r13, TORESET(_bootpg_addr)@h - ori r13, r13, TORESET(_bootpg_addr)@l + * jump there in AS=1 space with cache enabled. + * TORESET(...) has bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r13, TORESET(_bootpg_addr)) lwz r11, 0(r13) mtspr SRR0, r11 mfmsr r13 @@ -268,7 +289,7 @@ _second_half_boot_page: li r8, 3 stw r8, ENTRY_ADDR_LOWER(r10) - /* mask branch address (64MB) to setup tlb */ + /* Align branch address to 64MB boundary for TLB mapping below */ rlwinm r12, r4, 0, 0, 5 /* setup registers before jump */ @@ -280,7 +301,7 @@ _second_half_boot_page: li r4, 0 li r5, 0 li r6, 0 - lis r7, (64 * 1024 * 1024)@h + lis r7, (64 * 1024 * 1024)@h /* r7 = IMA size (64MB per ePAPR) */ li r8, 0 li r9, 0 @@ -295,8 +316,8 @@ _second_half_boot_page: /* Add tlb 1 entry 0 64MB for new entry */ lis r10, (MAS0_TLBSEL(1) | MAS0_ESEL(0))@h mtspr MAS0, r10 - lis r10, (MAS1_VALID | MAS1_IPROT)@h - ori r10, r10, (MAS1_TSIZE(BOOKE_PAGESZ_64M))@l + /* MAS1_VALID=0x80000000 has bit 31 set; use LOAD_ADDR32 for e6500. */ + LOAD_ADDR32(r10, (MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(BOOKE_PAGESZ_64M))) mtspr MAS1, r10 mtspr MAS2, r12 /* WIMGE = 0 */ ori r12, r12, (MAS3_SX | MAS3_SW | MAS3_SR) @@ -309,7 +330,7 @@ _second_half_boot_page: rfi /* Reserve space for spin table entries */ - .align 6 /* 64-bytes */ + .align 6 /* 64-byte alignment for spin table entries (ENTRY_SIZE) */ .globl _spin_table _spin_table: .space CPU_NUMCORES * ENTRY_SIZE diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index 9b0609fc66..d7155c869d 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -87,6 +87,87 @@ All TLBs for boot will be in TLB1 and supervisor mode (not user) ori reg, reg, (addr)@l #endif +/* Early UART debug output for boot diagnostics. + * Uses DUART0 at CCSRBAR + 0x11C500 (THR=+0, LSR=+5, LCR=+3, DLB=+0, DMB=+1). + * Before CCSRBAR relocation: 0xFE11C500 (default). + * After relocation on VPX3-152: 0xEF11C500. + * Baud divisor for 115200 @ 533.333MHz platform clock: + * divisor = platform_clock / (16 * baud) = 533333333 / (16 * 115200) = 289 = 0x0121 + * However platform clock varies; a safe default divisor is from the CW PABS + * U-Boot which sets 115200 baud. We re-init to be safe. + * Clobbers r10, r11. */ +#if defined(DEBUG_UART) && defined(CORE_E6500) +#define EARLY_UART_BASE (CCSRBAR_DEF + 0x11C500) +/* UART baud divisor: platform_clk / (16 * 115200) + * T2080 CCB=533.333MHz, platform=CCB/2=266.667MHz? No, UART uses platform clock. + * CW VPX3-152: CCB=533.333 MHz, UART clock = CCB/2 = 266.667 MHz + * Divisor = 266666667 / (16 * 115200) = 144.68 ~ 145 = 0x0091 + * But CW U-Boot uses divisor for 115200 at their clock. Let's try 0x0091. */ +/* T2080 DUART baud divisor - computed from platform clock at runtime + * in C code (hal_init). For early assembly debug, skip baud init and + * just send raw characters. They will appear garbled but serve as + * progress indicators (each unique byte = different boot stage). */ +/* CW VPX3-152 (608605-100 RCW) - 1.8 GHz / 600 MHz platform clock: + * bus_clk = SYS_CLK * plat_ratio / 2 = 66.667 MHz * 9 / 2 = 300 MHz + * divisor = bus_clk / (16 * 115200) = 300000000 / 1843200 = 163 = 0xA3 + * For CW PABS (608609-100 RCW) - 1.2 GHz / 533 MHz platform clock: + * bus_clk = 66.667 * 8 / 2 = 266.667 MHz, divisor = 145 = 0x91 */ +#ifdef BOARD_CW_VPX3152 +#define UART_DIVISOR_HI 0x00 +#define UART_DIVISOR_LO 0xA3 +#else +#define UART_DIVISOR_HI 0x00 +#define UART_DIVISOR_LO 0x91 +#endif +.macro uart_init_at base + li r10, 0 + oris r10, r10, (\base)@h + ori r10, r10, (\base)@l + /* Set DLAB to access divisor registers */ + li r11, 0x83 /* LCR: DLAB=1, 8N1 */ + stb r11, 3(r10) /* LCR */ + li r11, UART_DIVISOR_LO + stb r11, 0(r10) /* DLB (divisor low) */ + li r11, UART_DIVISOR_HI + stb r11, 1(r10) /* DMB (divisor high) */ + /* Clear DLAB, set 8N1 */ + li r11, 0x03 /* LCR: DLAB=0, 8N1 */ + stb r11, 3(r10) /* LCR */ + /* Enable and reset FIFOs */ + li r11, 0x07 /* FCR: FIFO enable, reset TX+RX */ + stb r11, 2(r10) /* FCR */ + /* MCR: DTR + RTS */ + li r11, 0x03 + stb r11, 4(r10) /* MCR */ +.endm +.macro debug_char_imm char + li r10, 0 + oris r10, r10, (EARLY_UART_BASE)@h + ori r10, r10, (EARLY_UART_BASE)@l +1: lbz r11, 5(r10) /* LSR */ + andi. r11, r11, 0x20 /* THRE */ + beq 1b + li r11, \char + stb r11, 0(r10) /* THR */ +.endm +/* After CCSRBAR relocation, use new base address */ +.macro debug_char_new char + li r10, 0 + oris r10, r10, (CCSRBAR + 0x11C500)@h + ori r10, r10, (CCSRBAR + 0x11C500)@l +1: lbz r11, 5(r10) /* LSR */ + andi. r11, r11, 0x20 /* THRE */ + beq 1b + li r11, \char + stb r11, 0(r10) /* THR */ +.endm +#else +.macro debug_char_imm char +.endm +.macro debug_char_new char +.endm +#endif + /* variables from linker script */ .global _start_vector .global isr_empty @@ -335,28 +416,37 @@ setup_interrupts: #if CCSRBAR_DEF != CCSRBAR_PHYS /* Use R8 = new, R9 = old virtual */ - lis r8, CCSRBAR@h - ori r8, r8, CCSRBAR@l - lis r9, (CCSRBAR + 0x1000)@h - ori r9, r9, (CCSRBAR + 0x1000)@l + LOAD_ADDR32(r8, CCSRBAR) + LOAD_ADDR32(r9, (CCSRBAR + 0x1000)) create_temp_ccsr: - /* Create a temporary TLB entry for new and old location */ - /* CCSRBAR: TLB 0, Entry 0, Supervisor R/W, IG, TS=0, 4KB */ + /* Create temporary TLB0 entries for CCSRBAR relocation. + * + * TLB0 on e6500 is 4-way set-associative (2048 entries, 512 sets). + * The "esel" parameter selects the WAY within a set; the SET is + * determined by the virtual address (EPN). These two entries map + * different EPNs (CCSRBAR vs CCSRBAR+0x1000), so they fall in + * different TLB0 sets and do not overwrite each other. + * + * We use different ways (0 and 1) for visual clarity. Both entries + * are cleaned up by the TLB0 flash-invalidate (MMUCSR0) after + * relocation completes. */ + + /* CCSRBAR new location: TLB0 Way 0, Supervisor R/W, IG, TS=0, 4KB */ set_tlb(0, 0, CCSRBAR, CCSRBAR, CCSRBAR_PHYS_HIGH, MAS3_SR | MAS3_SW, MAS2_I | MAS2_G, 0, BOOKE_PAGESZ_4K, 0, r3); - set_tlb(0, 0, + /* CCSRBAR old location: TLB0 Way 1, Supervisor R/W, IG, TS=0, 4KB */ + set_tlb(0, 1, CCSRBAR + 0x1000, CCSRBAR_DEF, 0, MAS3_SR | MAS3_SW, MAS2_I | MAS2_G, 0, BOOKE_PAGESZ_4K, 0, r3); verify_old_ccsr: /* verify the TLB is for old one */ - lis r0, CCSRBAR_DEF@h - ori r0, r0, CCSRBAR_DEF@l + LOAD_ADDR32(r0, CCSRBAR_DEF) #ifdef USE_CORENET_INTERFACE lwz r1, 4(r9) /* CCSRBARL */ #else @@ -373,12 +463,9 @@ ccsr_temp_law: #define CCSR_TEMP_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(LAW_TRGT_CORENET) | \ LAW_SIZE_4KB) - lis r0, CCSRBAR_PHYS_HIGH@h - ori r0, r0, CCSRBAR_PHYS_HIGH@l - lis r1, CCSRBAR_DEF@h - ori r1, r1, CCSRBAR_DEF@l - lis r2, CCSR_TEMP_LAW@h - ori r2, r2, CCSR_TEMP_LAW@l + LOAD_ADDR32(r0, CCSRBAR_PHYS_HIGH) + LOAD_ADDR32(r1, CCSRBAR_DEF) + LOAD_ADDR32(r2, CCSR_TEMP_LAW) stw r0, LAWBAR_BASE(0)(r9) /* LAWBARH */ stw r1, LAWBAR_BASE(0)+4(r9) /* LAWBARL */ sync @@ -393,13 +480,10 @@ read_old_ccsr: isync write_new_ccsrbar: - lis r0, CCSRBAR_PHYS_HIGH@h - ori r0, r0, CCSRBAR_PHYS_HIGH@l - lis r1, CCSRBAR@h - ori r1, r1, CCSRBAR@l + LOAD_ADDR32(r0, CCSRBAR_PHYS_HIGH) + LOAD_ADDR32(r1, CCSRBAR) #define CCSRAR_C 0x80000000 /* Commit */ - lis r2, CCSRAR_C@h - ori r2, r2, CCSRAR_C@l + LOAD_ADDR32(r2, CCSRAR_C) stw r0, 0(r9) /* CCSRBARH */ sync stw r1, 4(r9) /* CCSRBARL */ @@ -415,8 +499,7 @@ write_new_ccsrbar: lwz r0, 0(r9) isync /* write new CCSBAR */ - lis r0, (CCSRBAR_PHYS_HIGH << 20) | (CCSRBAR >> 12)@h - ori r0, r0, (CCSRBAR_PHYS_HIGH << 20) | (CCSRBAR >> 12)@l + LOAD_ADDR32(r0, (CCSRBAR_PHYS_HIGH << 20) | (CCSRBAR >> 12)) stw r0, 0(r9) sync isync @@ -431,6 +514,12 @@ invalidate_temp_tlb: /* L2TLB0_FI: TLB0 flash invalidate (write 1 to invalidate) */ li r3, 0x04 mtspr MMUCSR0, r3 + + /* Re-create TLB1 Entry 1 for the new (relocated) CCSRBAR address */ + set_tlb(1, 1, + CCSRBAR, CCSRBAR, CCSRBAR_PHYS_HIGH, + MAS3_SX | MAS3_SR | MAS3_SW, MAS2_I | MAS2_G, 0, + CCSRBAR_SIZE, 1, r3); #endif /* CCSRBAR_DEF != CCSRBAR_PHYS */ @@ -452,12 +541,16 @@ boot_page: 1: #endif +#if CCSRBAR_DEF == CCSRBAR ccsr_tlb: - /* CCSRBAR: TLB 1, Entry 1, Supervisor R/W, IG, TS=0, 1M/16M, IPROT */ + /* No relocation -- map CCSRBAR directly in TLB1 Entry 1 */ set_tlb(1, 1, CCSRBAR, CCSRBAR, CCSRBAR_PHYS_HIGH, MAS3_SX | MAS3_SR | MAS3_SW, MAS2_I | MAS2_G, 0, CCSRBAR_SIZE, 1, r3); +#endif + /* When CCSRBAR was relocated, TLB1 Entry 1 was already re-created + * for the new address after relocation (above). Do NOT overwrite it. */ #if defined(CORE_E5500) || defined(CORE_E6500) ccsr_law: @@ -466,12 +559,9 @@ ccsr_law: LAWAR_TRGT_ID(LAW_TRGT_CORENET) | \ LAW_SIZE_16MB) LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(0)) - lis r0, CCSRBAR_PHYS_HIGH@h - ori r0, r0, CCSRBAR_PHYS_HIGH@l - lis r1, CCSRBAR@h - ori r1, r1, CCSRBAR@l - lis r2, CCSR_LAW@h - ori r2, r2, CCSR_LAW@l + LOAD_ADDR32(r0, CCSRBAR_PHYS_HIGH) + LOAD_ADDR32(r1, CCSRBAR) + LOAD_ADDR32(r2, CCSR_LAW) stw r0, 0(r9) /* LAWBARH */ stw r1, 4(r9) /* LAWBARL */ sync @@ -483,19 +573,20 @@ ccsr_law: #ifdef FLASH_BASE_ADDR #if defined(CORE_E5500) || defined(CORE_E6500) - /* Memory Mapped NOR Flash (64/128MB) at 0xEC000000/0xE8000000 */ + /* Memory Mapped NOR Flash. For CW VPX3-152 (256 MB at 0xF0000000), + * this entry overlaps with the boot ROM TLB at top of flash. We + * handle that below by invalidating the boot ROM TLB after this + * entry is created (the IPROT-protected 256 MB Entry 2 will serve + * instruction fetches for the entire flash region, including boot ROM). */ flash_law: /* FLASH - LAW1 (IFC 64/128MB) */ #define FLASH_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(LAW_TRGT_IFC) | \ FLASH_LAW_SIZE) LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(1)) - lis r0, FLASH_BASE_PHYS_HIGH@h - ori r0, r0, FLASH_BASE_PHYS_HIGH@l - lis r1, FLASH_BASE_ADDR@h - ori r1, r1, FLASH_BASE_ADDR@l - lis r2, FLASH_LAW@h - ori r2, r2, FLASH_LAW@l + LOAD_ADDR32(r0, FLASH_BASE_PHYS_HIGH) + LOAD_ADDR32(r1, FLASH_BASE_ADDR) + LOAD_ADDR32(r2, FLASH_LAW) stw r0, 0(r9) /* LAWBARH */ stw r1, 4(r9) /* LAWBARL */ sync @@ -516,6 +607,33 @@ flash_tlb: FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH, MAS3_SX | MAS3_SW | MAS3_SR, FLASH_TLB_WING, 0, FLASH_TLB_PAGESZ, 1, r3); + +#ifdef BOARD_CW_VPX3152 + /* CW VPX3-152: the 256 MB flash TLB Entry 2 created above overlaps + * with the boot ROM TLB (Entry 0, at 0xFFFC0000-0xFFFFFFFF after + * shrink_default_tlb1) because flash covers 0xF0000000-0xFFFFFFFF. + * e6500 TLB1 multi-hit is a machine check. Invalidate the boot ROM + * TLB now -- Entry 2 (IPROT) will serve instruction fetches for the + * boot ROM region. Must clear IPROT via tlbwe before tlbivax, since + * IPROT entries are protected from invalidation. + * + * Uses r14 (saved ESEL from shrink_default_tlb1) to target the + * correct entry. */ +vpx3_invalidate_boot_tlb: + /* Build MAS0 = TLBSEL=1, ESEL=r14 */ + rlwinm r3, r14, 16, MAS0_ESEL_MSK + oris r3, r3, MAS0_TLBSEL(1)@h + mtspr MAS0, r3 + tlbre /* read Entry r14 into MAS1-3/7 */ + mfspr r3, MAS1 + LOAD_ADDR32(r4, MAS1_IPROT) + andc r3, r3, r4 /* clear IPROT */ + rlwinm r3, r3, 0, 1, 31 /* clear V (bit 0) */ + mtspr MAS1, r3 + tlbwe /* write back with V=0, IPROT=0 */ + isync + msync +#endif #else flash_tlb: /* For TS/AS=1 map boot ROM */ @@ -1192,8 +1310,9 @@ isr_empty: * initialized on cold boot) -> nested machine check -> checkstop. * Use r3 as base, r4 as scratch. */ #if defined(DEBUG_UART) && defined(TARGET_nxp_t2080) - /* Print '!' to UART to signal exception occurred */ - LOAD_ADDR32(r3, 0xFE11C500) + /* Print '!' to UART to signal exception occurred. + * Use CCSRBAR (which is the relocated address on VPX3-152). */ + LOAD_ADDR32(r3, CCSRBAR + 0x11C500) .L_isr_wait: lbz r4, 5(r3) andi. r4, r4, 0x20 diff --git a/src/update_ram.c b/src/update_ram.c index 7f2beb5a73..b2cd32c98a 100644 --- a/src/update_ram.c +++ b/src/update_ram.c @@ -277,10 +277,10 @@ void RAMFUNCTION wolfBoot_start(void) image_ptr = wolfBoot_peek_image(&os_image, 0, NULL); if (image_ptr) { if (*((uint32_t*)image_ptr) == UBOOT_IMG_HDR_MAGIC) { - /* Note: Could parse header and get load address at 0x10 */ - - /* Skip 64 bytes (size of Legacy format image header) */ - load_address += UBOOT_IMG_HDR_SZ; + /* Skip 64-byte legacy header in source; load address unchanged + * so kernel is placed at WOLFBOOT_LOAD_ADDRESS */ + wolfBoot_printf("U-Boot Legacy header detected, skipping %d bytes\n", + UBOOT_IMG_HDR_SZ); os_image.fw_base += UBOOT_IMG_HDR_SZ; os_image.fw_size -= UBOOT_IMG_HDR_SZ; }