@@ -279,14 +279,29 @@ static inline void mb(void)
279279 __asm__ volatile ("fence iorw, iorw" ::: "memory" );
280280}
281281
282- /* DDR-init delay. Forwards to rdcycle-based udelay() so the effective
283- * delay tracks the current CPU frequency. The previous implementation
284- * was a hardcoded ~40 MHz busy loop; after mss_pll_init() switches the
285- * CPU clock to ~600 MHz it ran ~15x too short, silently violating
286- * LPDDR4 reset / MR-write timing windows. */
282+ /* DDR-init busy-loop delay. The argument is NOT a real microsecond --
283+ * it is whatever the legacy busy-loop produces at the current CPU
284+ * clock. Empirically reaches train_stat=0x1D on the first attempt with
285+ * the same per-attempt rate as forwarding to udelay(), and is much
286+ * faster (~4 s vs ~50 s) for the TIP-wait timeout, which dominates
287+ * retry-loop time when training fails.
288+ *
289+ * Do NOT replace with udelay(us) without re-timing every call site
290+ * below: at 600 MHz the busy-loop delivers roughly us/20 of a real us,
291+ * so udelay(us) makes every post-PLL delay ~20x longer. In addition
292+ * to slowing retries, this can shift LPDDR4 / PHY timing windows --
293+ * earlier observed empirical data showed an isolated additional
294+ * regression beyond the pre-existing ~30% per-attempt failure rate.
295+ *
296+ * The "5us" / "250us" / "2ms" comments at the call sites are LEGACY
297+ * and do not reflect the actual delay; preserved for git blame, not
298+ * as timing references. */
287299static void ddr_delay (uint32_t us )
288300{
289- udelay (us );
301+ volatile uint32_t i ;
302+ for (i = 0 ; i < us * 10 ; i ++ ) {
303+ __asm__ volatile ("nop" );
304+ }
290305}
291306
292307/* IOSCB Bank Controllers and DLL bases */
@@ -2983,11 +2998,22 @@ int mpfs_ddr_init(void)
29832998 wolfBoot_printf ("MT53D512M32D2DS-053 x32 @ 1600 Mbps\n" );
29842999 wolfBoot_printf ("========================================\n" );
29853000
2986- /* Step 1: NWC/PLL initialization */
2987- ret = nwc_init ();
2988- if (ret != 0 ) {
2989- wolfBoot_printf ("DDR: NWC init FAILED\n" );
2990- return -1 ;
3001+ /* Step 1: NWC/PLL initialization. Run only once per boot -- the
3002+ * MSS / DDR PLLs lock on first call and re-running mss_pll_init()
3003+ * hangs on the lock wait when called against an already-locked
3004+ * PLL. The outer retry loop in hal_init() re-enters this function
3005+ * for full controller/PHY re-init, but the PLLs only need to be
3006+ * brought up once. */
3007+ {
3008+ static int nwc_initialized = 0 ;
3009+ if (!nwc_initialized ) {
3010+ ret = nwc_init ();
3011+ if (ret != 0 ) {
3012+ wolfBoot_printf ("DDR: NWC init FAILED\n" );
3013+ return -1 ;
3014+ }
3015+ nwc_initialized = 1 ;
3016+ }
29913017 }
29923018
29933019 /* Step 2: Enable DDR controller clock */
@@ -3029,141 +3055,142 @@ int mpfs_ddr_init(void)
30293055 DDRPHY_REG (PHY_TIP_CFG_PARAMS ) = LIBERO_SETTING_TIP_CFG_PARAMS ;
30303056 mb ();
30313057
3032- /* Step 9: Run training, with retry-on-failure mirroring HSS's
3033- * DDR_TRAINING_FAIL state machine (mss_ddr.c:512). HSS retries
3034- * up to MAX_RETRY_COUNT times: on each retry it resets CKE,
3035- * forces controller reset, clears DFI/CTRLR_INIT, then re-runs
3036- * training (which selects a different refclk_offset internally).
3058+ /* Step 9: Run training + post-training + MTC sanity, with retry on
3059+ * MTC failure.
30373060 *
3038- * Phase 3.10.3 (2): we don't yet have the refclk_offset sweep
3039- * inside our run_training, but trying just the basic retry
3040- * (controller reset + re-init) might surface whether
3041- * train_stat advances on a fresh attempt.
3061+ * Why MTC is the retry trigger (not PHY_TRAINING_STATUS): when the
3062+ * manual ADDCMD training picks a marginal phase/dly that doesn't
3063+ * resolve into a usable DRAM alignment, train_stat sticks at 0x1
3064+ * (BCLK_SCLK only). But TIP keeps spinning in the background and
3065+ * eventually flips the WRLVL/RDGATE/DQ_DQS bits to read 0x1D, even
3066+ * though the alignment is bogus. An outer retry keyed on
3067+ * PHY_TRAINING_STATUS sees that bogus 0x1D and stops. MTC actually
3068+ * exercises the DDR controller -- it times out unambiguously when
3069+ * training was bad, and is the reliable signal.
3070+ *
3071+ * Empirical baseline: ~30% per-attempt training failure rate -> 5
3072+ * retries gives ~99.7% cumulative success rate.
30423073 */
30433074 {
3044- uint32_t retry_count = 0 ;
3045- const uint32_t MAX_RETRY = 3 ;
3046- uint32_t train_stat_now ;
3047-
3048- ret = run_training ();
3049- train_stat_now = DDRPHY_REG (PHY_TRAINING_STATUS );
3050- while ((train_stat_now & TRAINING_MASK ) != (BCLK_SCLK_BIT | WRLVL_BIT |
3051- RDGATE_BIT | DQ_DQS_BIT ) && retry_count < MAX_RETRY ) {
3052- wolfBoot_printf ("DDR: Training retry %lu (train_stat=0x%x)\n" ,
3053- (unsigned long )retry_count , train_stat_now );
3054- /* HSS DDR_TRAINING_FAIL reset sequence (mss_ddr.c:519-538) */
3055- DDRCFG_REG (MC_INIT_CS ) = 0x1 ;
3056- DDRCFG_REG (MC_INIT_DISABLE_CKE ) = 0x1 ;
3057- ddr_delay (500 ); /* DELAY_CYCLES_5_MICRO */
3058- DDRCFG_REG (MC_INIT_FORCE_RESET ) = 0x1 ;
3059- ddr_delay (200000 ); /* DELAY_CYCLES_2MS */
3060- retry_count ++ ;
3061- DDRCFG_REG (MC_DFI_INIT_START ) = 0x0 ;
3062- DDRCFG_REG (MC_CTRLR_INIT ) = 0x0 ;
3063- DDRPHY_REG (PHY_TRAINING_START ) = 0x0 ;
3064- mb ();
3065- /* Re-run training */
3075+ uint32_t train_retry = 0 ;
3076+ /* 1 inner attempt only. When manual training picks a marginal
3077+ * alignment, TIP cannot recover and the MTC engine wedges
3078+ * (DONE_ACK stuck at 0 even with train_stat=0x1D). Re-running
3079+ * run_training() inside the same controller-state cannot
3080+ * unwedge MTC -- the outer loop in hal_init re-enters
3081+ * mpfs_ddr_init from scratch, which clears MTC via the SYSREG
3082+ * DDRC soft-reset pulse, and that's the only path that
3083+ * actually recovers. */
3084+ const uint32_t MAX_TRAIN_RETRY = 1 ;
3085+ uint32_t lane ;
3086+ uint32_t mtc_to ;
3087+ int mtc_pass = 0 ;
3088+
3089+ while (train_retry < MAX_TRAIN_RETRY ) {
3090+ if (train_retry > 0 ) {
3091+ wolfBoot_printf (
3092+ "DDR: Retry %u/%u after MTC sanity FAIL\n" ,
3093+ (unsigned )train_retry , (unsigned )MAX_TRAIN_RETRY );
3094+ /* HSS DDR_TRAINING_FAIL reset sequence (mss_ddr.c:519-538) */
3095+ DDRCFG_REG (MC_INIT_CS ) = 0x1 ;
3096+ DDRCFG_REG (MC_INIT_DISABLE_CKE ) = 0x1 ;
3097+ ddr_delay (500 );
3098+ DDRCFG_REG (MC_INIT_FORCE_RESET ) = 0x1 ;
3099+ ddr_delay (200000 );
3100+ DDRCFG_REG (MC_DFI_INIT_START ) = 0x0 ;
3101+ DDRCFG_REG (MC_CTRLR_INIT ) = 0x0 ;
3102+ DDRPHY_REG (PHY_TRAINING_START ) = 0x0 ;
3103+ mb ();
3104+ }
3105+ train_retry ++ ;
3106+
30663107 ret = run_training ();
3067- train_stat_now = DDRPHY_REG (PHY_TRAINING_STATUS );
3068- }
3069- wolfBoot_printf ("DDR: Final train_stat=0x%x after %lu retries\n" ,
3070- train_stat_now , (unsigned long )retry_count );
3071- }
3072- if (ret != 0 ) {
3073- wolfBoot_printf ("DDR: Training FAILED\n" );
3074- return -2 ;
3075- }
3108+ if (ret != 0 ) {
3109+ continue ;
3110+ }
30763111
3077- /* Phase 3.10.3 (D-3 v2): HSS post-training sequence.
3078- *
3079- * After train_stat=0x1D, HSS does these critical steps before any
3080- * CPU access (mss_ddr.c DDR_TRAINING_WRITE_CALIBRATION + after):
3081- *
3082- * (a) Set rpc220 = 0xC (LPDDR4 default DQ delay center)
3083- * (b) load_dq(lane) for each of 4 lanes -- per-lane DQ delay load
3084- * (c) write_calibration_using_mtc() -- HSS's MTC sweep (validates
3085- * DDR via the on-chip MTC engine, no CPU bus involved)
3086- * (d) MTC_test counting + pseudo-random patterns (DDR_FULL_MTC_CHECK)
3087- * (e) Then the CPU access at 0xC0000000 / 0x80000000 succeeds.
3088- *
3089- * Without (a)-(d), the first CPU write to DDR hangs (we observe this
3090- * for both cached 0x80000000 and non-cached 0xC0000000). The MTC
3091- * activity exercises the DDR controller and seems to "wake up" the
3092- * data path / drain any lingering training state.
3093- */
3094- wolfBoot_printf ("DDR: Post-training sequence...\n" );
3112+ /* HSS DDR_TRAINING_SET_FINAL_MODE: rewrite DDRPHY_MODE with
3113+ * LIBERO setting to transition PHY from training to
3114+ * operational mode (mss_ddr.c:1619). */
3115+ wolfBoot_printf ("DDR: Post-training sequence...\n" );
3116+ DDRPHY_REG (PHY_MODE ) = LIBERO_SETTING_DDRPHY_MODE ;
3117+ mb ();
3118+ wolfBoot_printf (" DDRPHY_MODE -> 0x%x (final)\n" ,
3119+ DDRPHY_REG (PHY_MODE ));
30953120
3096- /* DDR_TRAINING_SET_FINAL_MODE (HSS mss_ddr.c:1619): rewrite
3097- * DDRPHY_MODE with LIBERO setting after training success. This
3098- * transitions the PHY from training mode to operational mode. */
3099- DDRPHY_REG (PHY_MODE ) = LIBERO_SETTING_DDRPHY_MODE ;
3100- mb ();
3101- wolfBoot_printf (" DDRPHY_MODE -> 0x%x (final)\n" ,
3102- DDRPHY_REG (PHY_MODE ));
3121+ /* rpc220 = 0xC for LPDDR4 -- centers DQ/DQS sampling. */
3122+ DDRPHY_REG (PHY_RPC220 ) = 0x0CUL ;
3123+ mb ();
31033124
3104- /* (a) rpc220 = 0xC for LPDDR4 -- centers DQ/DQS sampling */
3105- DDRPHY_REG (PHY_RPC220 ) = 0x0CUL ;
3106- mb ();
3125+ /* load_dq(lane) for each of 4 lanes (HSS mss_ddr.c:2916).
3126+ * Per-lane: clear move, set DFI override + expert mode,
3127+ * pulse load, restore expert mode. */
3128+ for (lane = 0 ; lane < 4 ; lane ++ ) {
3129+ DDRPHY_REG (PHY_EXPERT_DLYCNT_MOVE0 ) = 0x00UL ;
3130+ DDRPHY_REG (PHY_EXPERT_DFI_STATUS_TO_SHIM ) = 0x07UL ;
3131+ DDRPHY_REG (PHY_EXPERT_MODE_EN ) = 0x21UL ;
3132+ DDRPHY_REG (PHY_EXPERT_DLYCNT_LOAD0 ) =
3133+ (0xFFUL << (lane * 8UL ));
3134+ DDRPHY_REG (PHY_EXPERT_DLYCNT_LOAD0 ) = 0x00UL ;
3135+ DDRPHY_REG (PHY_EXPERT_MODE_EN ) = 0x08UL ;
3136+ }
3137+ mb ();
3138+ wolfBoot_printf (" load_dq done for 4 lanes\n" );
3139+ wolfBoot_printf (
3140+ " CTRLR_INIT_DONE=0x%x AUTOINIT_DIS=0x%x train_stat=0x%x\n" ,
3141+ DDRCFG_REG (MC_CTRLR_INIT_DONE ),
3142+ DDRCFG_REG (MC_INIT_AUTOINIT_DISABLE ),
3143+ DDRPHY_REG (PHY_TRAINING_STATUS ));
3144+
3145+ /* MTC sanity: smallest region (size=8 -> 2^8 = 256 B),
3146+ * counting pattern, sequential addressing, RW. */
3147+ DDRCFG_REG (MT_EN ) = 0 ;
3148+ DDRCFG_REG (MT_EN_SINGLE ) = 0 ;
3149+ DDRCFG_REG (MT_STOP_ON_ERROR ) = 0 ;
3150+ DDRCFG_REG (0x440C ) = 0 ; /* MT_RD_ONLY */
3151+ DDRCFG_REG (0x4410 ) = 0 ; /* MT_WR_ONLY */
3152+ DDRCFG_REG (MT_DATA_PATTERN ) = 0 ;
3153+ DDRCFG_REG (MT_ADDR_PATTERN ) = 0 ;
3154+ DDRCFG_REG (MT_START_ADDR_0 ) = 0 ;
3155+ DDRCFG_REG (MT_START_ADDR_1 ) = 0 ;
3156+ DDRCFG_REG (MT_ADDR_BITS ) = 8 ;
3157+ DDRCFG_REG (MT_ERROR_MASK_0 ) = 0xFFFFFFFFUL ;
3158+ DDRCFG_REG (MT_ERROR_MASK_1 ) = 0xFFFFFFFFUL ;
3159+ DDRCFG_REG (MT_ERROR_MASK_2 ) = 0xFFFFFFFFUL ;
3160+ DDRCFG_REG (MT_ERROR_MASK_3 ) = 0xFFFFFFFFUL ;
3161+ DDRCFG_REG (MT_ERROR_MASK_4 ) = 0xFFFFFFFFUL ;
3162+ DDRCFG_REG (MT_EN_SINGLE ) = 0 ;
3163+ DDRCFG_REG (MT_EN_SINGLE ) = 1 ;
3164+ mtc_to = 0xFFFFFFUL ;
3165+ while ((DDRCFG_REG (MT_DONE_ACK ) & 0x1UL ) == 0 && mtc_to > 0 ) {
3166+ mtc_to -- ;
3167+ }
3168+ if (mtc_to == 0 ) {
3169+ wolfBoot_printf (
3170+ " MTC 256B TIMEOUT (DONE_ACK=0x%x ERR_STS=0x%x)\n" ,
3171+ DDRCFG_REG (MT_DONE_ACK ), DDRCFG_REG (MT_ERROR_STS ));
3172+ continue ;
3173+ }
3174+ if ((DDRCFG_REG (MT_ERROR_STS ) & 0x1UL ) != 0 ) {
3175+ wolfBoot_printf (" MTC 256B FAIL (err_sts=0x%x)\n" ,
3176+ DDRCFG_REG (MT_ERROR_STS ));
3177+ continue ;
3178+ }
3179+ wolfBoot_printf (" MTC 256B PASS (err_sts=0x%x to_used=0x%x)\n" ,
3180+ DDRCFG_REG (MT_ERROR_STS ),
3181+ (unsigned int )(0xFFFFFFUL - mtc_to ));
31073182
3108- /* (b) load_dq(lane) for each of 4 lanes (HSS mss_ddr.c:2916).
3109- * Per-lane sequence: clear move, set DFI override + expert mode,
3110- * pulse load, restore expert mode. */
3111- {
3112- uint32_t lane ;
3113- for (lane = 0 ; lane < 4 ; lane ++ ) {
3114- DDRPHY_REG (PHY_EXPERT_DLYCNT_MOVE0 ) = 0x00UL ;
3115- DDRPHY_REG (PHY_EXPERT_DFI_STATUS_TO_SHIM ) = 0x07UL ;
3116- DDRPHY_REG (PHY_EXPERT_MODE_EN ) = 0x21UL ;
3117- DDRPHY_REG (PHY_EXPERT_DLYCNT_LOAD0 ) = (0xFFUL << (lane * 8UL ));
3118- DDRPHY_REG (PHY_EXPERT_DLYCNT_LOAD0 ) = 0x00UL ;
3119- DDRPHY_REG (PHY_EXPERT_MODE_EN ) = 0x08UL ;
3183+ mtc_pass = 1 ;
3184+ break ;
31203185 }
3121- mb ();
3122- wolfBoot_printf (" load_dq done for 4 lanes\n" );
3123- }
3124-
3125- /* Pre-MTC diagnostic snapshot */
3126- wolfBoot_printf (" CTRLR_INIT_DONE=0x%x AUTOINIT_DIS=0x%x train_stat=0x%x\n" ,
3127- DDRCFG_REG (MC_CTRLR_INIT_DONE ),
3128- DDRCFG_REG (MC_INIT_AUTOINIT_DISABLE ),
3129- DDRPHY_REG (PHY_TRAINING_STATUS ));
31303186
3131- /* (c)+(d) MTC sanity check. Smallest region (size=8 -> 2^8 = 256 B).
3132- * Per HSS MTC_test sequence: MT_EN=0, MT_RD_ONLY=0, MT_WR_ONLY=0, ... */
3133- {
3134- uint32_t mtc_to ;
3135- uint32_t mtc_err ;
3136- DDRCFG_REG (MT_EN ) = 0 ;
3137- DDRCFG_REG (MT_EN_SINGLE ) = 0 ;
3138- DDRCFG_REG (MT_STOP_ON_ERROR ) = 0 ;
3139- DDRCFG_REG (0x440C ) = 0 ; /* MT_RD_ONLY = 0 (normal RW) */
3140- DDRCFG_REG (0x4410 ) = 0 ; /* MT_WR_ONLY = 0 */
3141- DDRCFG_REG (MT_DATA_PATTERN ) = 0 ; /* counting pattern */
3142- DDRCFG_REG (MT_ADDR_PATTERN ) = 0 ; /* sequential */
3143- DDRCFG_REG (MT_START_ADDR_0 ) = 0 ;
3144- DDRCFG_REG (MT_START_ADDR_1 ) = 0 ;
3145- DDRCFG_REG (MT_ADDR_BITS ) = 8 ; /* 2^8 = 256 bytes */
3146- DDRCFG_REG (MT_ERROR_MASK_0 ) = 0xFFFFFFFFUL ;
3147- DDRCFG_REG (MT_ERROR_MASK_1 ) = 0xFFFFFFFFUL ;
3148- DDRCFG_REG (MT_ERROR_MASK_2 ) = 0xFFFFFFFFUL ;
3149- DDRCFG_REG (MT_ERROR_MASK_3 ) = 0xFFFFFFFFUL ;
3150- DDRCFG_REG (MT_ERROR_MASK_4 ) = 0xFFFFFFFFUL ;
3151- DDRCFG_REG (MT_EN_SINGLE ) = 0 ;
3152- DDRCFG_REG (MT_EN_SINGLE ) = 1 ; /* Run */
3153- mtc_to = 0xFFFFFFUL ;
3154- while ((DDRCFG_REG (MT_DONE_ACK ) & 0x1UL ) == 0 && mtc_to > 0 ) {
3155- mtc_to -- ;
3156- }
3157- if (mtc_to == 0 ) {
3158- wolfBoot_printf (" MTC 256B TIMEOUT (DONE_ACK=0x%x ERR_STS=0x%x)\n" ,
3159- DDRCFG_REG (MT_DONE_ACK ), DDRCFG_REG (MT_ERROR_STS ));
3160- } else {
3161- mtc_err = DDRCFG_REG (MT_ERROR_STS ) & 0x1UL ;
3162- wolfBoot_printf (" MTC 256B %s (err_sts=0x%x to_used=0x%x)\n" ,
3163- mtc_err == 0 ? "PASS" : "FAIL" ,
3164- DDRCFG_REG (MT_ERROR_STS ),
3165- (unsigned int )(0xFFFFFFUL - mtc_to ));
3187+ if (!mtc_pass ) {
3188+ wolfBoot_printf ("DDR: Training/MTC failed after %u retries\n" ,
3189+ (unsigned )MAX_TRAIN_RETRY );
3190+ return -2 ;
31663191 }
3192+ wolfBoot_printf ("DDR: Training+MTC PASS after %u retries\n" ,
3193+ (unsigned )(train_retry - 1 ));
31673194 }
31683195
31693196 /* DDR pre-fill is currently disabled because both PDMA-based and
@@ -3326,9 +3353,32 @@ void hal_init(void)
33263353 wolfBoot_printf ("Running on E51 (hart 0) in M-mode\n" );
33273354
33283355#ifdef MPFS_DDR_INIT
3329- /* Bring up LPDDR4 before any DDR-resident operations */
3330- if (mpfs_ddr_init () != 0 ) {
3331- wolfBoot_printf ("DDR: Init FAILED - continuing with L2 only\n" );
3356+ /* Bring up LPDDR4 before any DDR-resident operations.
3357+ *
3358+ * Outer retry loop: each call to mpfs_ddr_init() does a SYSREG DDRC
3359+ * soft-reset pulse, which clears the MTC engine state. If the
3360+ * inner retry inside mpfs_ddr_init() exhausts (typically because
3361+ * MTC wedged after the first failure), come back here for a full
3362+ * controller re-init. Empirical: per-attempt failure rate ~30%, so
3363+ * 3 outer attempts cover ~99.7% of boots. */
3364+ {
3365+ unsigned int outer_retry ;
3366+ const unsigned int MAX_OUTER_RETRY = 3 ;
3367+ int ddr_ok = 0 ;
3368+ for (outer_retry = 0 ; outer_retry < MAX_OUTER_RETRY ; outer_retry ++ ) {
3369+ if (outer_retry > 0 ) {
3370+ wolfBoot_printf (
3371+ "DDR: Outer retry %u/%u (full DDRC re-init)\n" ,
3372+ outer_retry , MAX_OUTER_RETRY );
3373+ }
3374+ if (mpfs_ddr_init () == 0 ) {
3375+ ddr_ok = 1 ;
3376+ break ;
3377+ }
3378+ }
3379+ if (!ddr_ok ) {
3380+ wolfBoot_printf ("DDR: Init FAILED - continuing with L2 only\n" );
3381+ }
33323382 }
33333383#endif
33343384#endif
0 commit comments