Skip to content

Commit 9a5486f

Browse files
committed
SFP boot scripts: wait for carrier before loading SGMII+ module (#602)
* Sync SFP SGMII+ boot scripts: carrier wait before module load Boot scripts now nohup to background and wait up to 90s for the SFP to establish a 1G link before loading the kernel module. Fixes a boot-order race with SFPs like the Zyxel PMG3000 that need ~15s to configure their SerDes. If no carrier appears within the timeout, the module loads anyway to handle SFPs hard-locked at 2.5G. * Show carrier wait note when SFP boot script deployed but module not loaded Explains the up-to-90s link check period that allows the ONT to complete its boot sequence before the SGMII+ module loads. * Suppress Issue badge for 90s after SFP deploy, fix alert margin and wording After deploying an SFP tweak, the boot script waits up to 90s for carrier before loading the module. During this grace period, show Active instead of Issue since the module not being loaded yet is expected. Also add bottom margin to the carrier wait info alert and clarify "kernel module" vs "module" to avoid confusion with SFP modules. * Carrier wait note: mention deploy in addition to reboot
1 parent bacf991 commit 9a5486f

3 files changed

Lines changed: 99 additions & 19 deletions

File tree

src/NetworkOptimizer.Web/Components/Pages/PerformanceTweaks.razor

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@
220220
@foreach (var def in _tweakDefs.Where(d => d.IsCompatibleWith(_status?.GatewayModel)))
221221
{
222222
var tweakStatus = _status?.Tweaks.GetValueOrDefault(def.Id);
223-
var effectiveStatus = GetEffectiveStatus(tweakStatus);
223+
var effectiveStatus = GetEffectiveStatus(tweakStatus, def.Id);
224224
var exclusiveOtherActive = def.MutuallyExclusiveWith != null
225225
&& _status?.Tweaks.TryGetValue(def.MutuallyExclusiveWith, out var otherTweak) == true
226226
&& (otherTweak.IsActive || otherTweak.IsManuallyDeployed);
@@ -258,6 +258,14 @@
258258
<p class="pt-tweak-description" style="font-style: italic;">@def.ExtraNote</p>
259259
}
260260

261+
<!-- SFP carrier wait note: boot script deployed but module not yet loaded -->
262+
@if ((def.Id == "sfp-sgmiiplus" || def.Id == "sfp-sgmiiplus-port6") && tweakStatus?.BootScriptDeployed == true && tweakStatus?.RuntimeDetected != true)
263+
{
264+
<div class="alert alert-info" style="margin-top: 0.75rem; margin-bottom: 0.75rem;">
265+
<strong>Waiting for link:</strong> After a reboot or deploy, the boot script waits up to 90 seconds for the SFP to establish a 1 G link before loading the SGMII+ kernel module. This allows time for the ONT's boot sequence and SerDes configuration. If the kernel module has not loaded yet, it may still be in this link check period.
266+
</div>
267+
}
268+
261269
<!-- Health Check Results -->
262270
@if (tweakStatus != null && tweakStatus.HealthChecks.Any())
263271
{
@@ -934,6 +942,7 @@ ls -la /var/config/run-syslog.sh</code></pre>
934942
private bool _showRemoveConfirm;
935943
private string? _pendingRemoveTweakId;
936944
private bool _showZyxelInstructions;
945+
private readonly Dictionary<string, DateTime> _sfpDeployTimes = new();
937946
private bool _allConfirmed => _confirmBackup && _confirmBackupDownloaded && _confirmWarranty && _confirmRisk;
938947
private bool _canDeploy => _status?.UdmBootInstalled == true && _status?.FirmwareSupported == true;
939948
private List<string> _deploySteps = new();
@@ -1122,7 +1131,11 @@ ls -la /var/config/run-syslog.sh</code></pre>
11221131
var result = await DeployService.DeployTweakAsync(tweakId, progress);
11231132

11241133
if (result.success)
1134+
{
1135+
if (tweakId is "sfp-sgmiiplus" or "sfp-sgmiiplus-port6")
1136+
_sfpDeployTimes[tweakId] = DateTime.UtcNow;
11251137
await LoadStatusAsync();
1138+
}
11261139
}
11271140
finally
11281141
{
@@ -1163,13 +1176,22 @@ ls -la /var/config/run-syslog.sh</code></pre>
11631176
await LoadStatusAsync();
11641177
}
11651178

1166-
private TweakDisplayStatus GetEffectiveStatus(TweakDeploymentStatus? status)
1179+
private bool IsSfpInCarrierWait(string tweakId) =>
1180+
_sfpDeployTimes.TryGetValue(tweakId, out var deployTime)
1181+
&& (DateTime.UtcNow - deployTime).TotalSeconds < 90;
1182+
1183+
private TweakDisplayStatus GetEffectiveStatus(TweakDeploymentStatus? status, string? tweakId = null)
11671184
{
11681185
if (status == null) return TweakDisplayStatus.NotDeployed;
11691186
if (status.IsManuallyDeployed) return TweakDisplayStatus.Manual;
11701187
if (status.IsActive && string.IsNullOrEmpty(status.IssueDescription)) return TweakDisplayStatus.Active;
11711188
if (status.IsActive && !string.IsNullOrEmpty(status.IssueDescription)) return TweakDisplayStatus.Issue;
1172-
if (status.BootScriptDeployed && !status.IsActive) return TweakDisplayStatus.Issue;
1189+
if (status.BootScriptDeployed && !status.IsActive)
1190+
{
1191+
if (tweakId != null && IsSfpInCarrierWait(tweakId))
1192+
return TweakDisplayStatus.Active;
1193+
return TweakDisplayStatus.Issue;
1194+
}
11731195
if (status.RuntimeDetected && !status.BootScriptDeployed) return TweakDisplayStatus.Detected;
11741196
return TweakDisplayStatus.NotDeployed;
11751197
}

src/NetworkOptimizer.Web/Resources/PerfTweaks/19-sfp-sgmiiplus-eth5.sh

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
#!/bin/sh
22
# 19-sfp-sgmiiplus-eth5.sh: Force 1st SFP+ port (eth5 / Port 6) to SGMII+ 2.5G
33
#
4-
# Loads a kernel module that switches uniphy2 from SGMII 1G to SGMII+ 2.5G
5-
# by calling the QCA-SSDK's internal uniphy mode set function directly,
6-
# bypassing SFP EEPROM validation that blocks the speed change.
4+
# Waits for the SFP to establish a 1G link, then loads a kernel module that
5+
# switches uniphy2 from SGMII 1G to SGMII+ 2.5G. The wait avoids a boot-order
6+
# race with SFPs that need time to configure their SerDes (e.g., Zyxel PMG3000
7+
# takes ~15s after boot to fire its 2.5G override). If no 1G link appears
8+
# within the timeout, the module loads anyway — this handles SFPs that are
9+
# hard-locked at 2.5G and can't establish a 1G link without the host matching.
10+
#
11+
# The module bypasses the SSDK's SFP EEPROM validation by calling the uniphy
12+
# mode set function directly. The SSDK's MAC sync polling loop re-reads the
13+
# SFP EEPROM every ~12s and would revert the 2.5G change. The module excludes
14+
# eth5 from the polling loop's port bitmap and restarts it — the loop continues
15+
# to run for all other ports, so eth6 link recovery is unaffected.
716
#
817
# WARNING: This targets eth5 / Port 6 (the 1st SFP+ port) ONLY.
918
# For eth6 / Port 7, use 20-sfp-sgmiiplus.sh instead.
1019
#
11-
# The SSDK's MAC sync polling loop re-reads the SFP EEPROM every ~12s and
12-
# would revert the 2.5G change. The module excludes eth5 from the
13-
# polling loop's port bitmap and restarts it — the loop continues to run
14-
# for all other ports, so eth6 link recovery is unaffected.
15-
#
1620
# Target: UCG-Fiber / UXG-Fiber (IPQ9574, kernel 5.4.213-ui-ipq9574)
1721
# Requires: qca-ssdk.ko loaded, module pre-deployed to /data/sfp-sgmiiplus/
1822

@@ -22,11 +26,19 @@ MODULE_DIR="/data/sfp-sgmiiplus"
2226
MODULE_NAME="force_uniphy2_sgmiiplus"
2327
MODULE_FILE="${MODULE_DIR}/${MODULE_NAME}.ko"
2428
CLOCK_PATH="/sys/kernel/debug/clk/uniphy2_gcc_tx_clk/clk_rate"
29+
IFACE="eth5"
30+
CARRIER_TIMEOUT=90
2531

2632
log() {
2733
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "${LOG_FILE}"
2834
}
2935

36+
# Re-exec in background so on_boot.d doesn't block waiting for carrier
37+
if [ "$1" != "--bg" ]; then
38+
nohup "$0" --bg >/dev/null 2>&1 &
39+
exit 0
40+
fi
41+
3042
# ─── Sanity checks ───
3143

3244
if [ ! -f "${MODULE_FILE}" ]; then
@@ -44,6 +56,23 @@ if ! lsmod | grep -q "qca_ssdk"; then
4456
exit 1
4557
fi
4658

59+
# ─── Wait for 1G carrier or timeout ───
60+
61+
elapsed=0
62+
while [ $elapsed -lt $CARRIER_TIMEOUT ]; do
63+
carrier=$(cat /sys/class/net/${IFACE}/carrier 2>/dev/null)
64+
if [ "$carrier" = "1" ]; then
65+
log "${IFACE} has carrier after ${elapsed}s — loading module"
66+
break
67+
fi
68+
sleep 2
69+
elapsed=$((elapsed + 2))
70+
done
71+
72+
if [ "$carrier" != "1" ]; then
73+
log "${IFACE} no carrier after ${CARRIER_TIMEOUT}s — loading module anyway (SFP may be hard-locked at 2.5G)"
74+
fi
75+
4776
# ─── Load module ───
4877

4978
log "Loading ${MODULE_NAME}..."

src/NetworkOptimizer.Web/Resources/PerfTweaks/20-sfp-sgmiiplus.sh

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
#!/bin/sh
22
# 20-sfp-sgmiiplus.sh: Force 2nd SFP+ port (eth6 / Port 7) to SGMII+ 2.5G
33
#
4-
# Loads a kernel module that switches uniphy1 from SGMII 1G to SGMII+ 2.5G
5-
# by calling the QCA-SSDK's internal uniphy mode set function directly,
6-
# bypassing SFP EEPROM validation that blocks the speed change.
4+
# Waits for the SFP to establish a 1G link, then loads a kernel module that
5+
# switches uniphy1 from SGMII 1G to SGMII+ 2.5G. The wait avoids a boot-order
6+
# race with SFPs that need time to configure their SerDes (e.g., Zyxel PMG3000
7+
# takes ~15s after boot to fire its 2.5G override). If no 1G link appears
8+
# within the timeout, the module loads anyway — this handles SFPs that are
9+
# hard-locked at 2.5G and can't establish a 1G link without the host matching.
710
#
8-
# WARNING: This targets eth6 / Port 7 (the 2nd SFP+ port) ONLY.
11+
# The module bypasses the SSDK's SFP EEPROM validation by calling the uniphy
12+
# mode set function directly. The SSDK's MAC sync polling loop re-reads the
13+
# SFP EEPROM every ~12s and would revert the 2.5G change. The module (v3+)
14+
# excludes eth6 from the polling loop's port bitmap and restarts it — the loop
15+
# continues to run for all other ports, so eth5 link recovery is unaffected.
916
#
10-
# The SSDK's MAC sync polling loop re-reads the SFP EEPROM every ~12s and
11-
# would revert the 2.5G change. The module (v3+) excludes eth6 from the
12-
# polling loop's port bitmap and restarts it — the loop continues to run
13-
# for all other ports, so eth5 link recovery is unaffected.
17+
# WARNING: This targets eth6 / Port 7 (the 2nd SFP+ port) ONLY.
1418
#
1519
# Target: UCG-Fiber / UXG-Fiber (IPQ9574, kernel 5.4.213-ui-ipq9574)
1620
# Requires: qca-ssdk.ko loaded, module pre-deployed to /data/sfp-sgmiiplus/
@@ -21,11 +25,19 @@ MODULE_DIR="/data/sfp-sgmiiplus"
2125
MODULE_NAME="force_uniphy1_sgmiiplus"
2226
MODULE_FILE="${MODULE_DIR}/${MODULE_NAME}.ko"
2327
CLOCK_PATH="/sys/kernel/debug/clk/uniphy1_gcc_tx_clk/clk_rate"
28+
IFACE="eth6"
29+
CARRIER_TIMEOUT=90
2430

2531
log() {
2632
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "${LOG_FILE}"
2733
}
2834

35+
# Re-exec in background so on_boot.d doesn't block waiting for carrier
36+
if [ "$1" != "--bg" ]; then
37+
nohup "$0" --bg >/dev/null 2>&1 &
38+
exit 0
39+
fi
40+
2941
# ─── Sanity checks ───
3042

3143
if [ ! -f "${MODULE_FILE}" ]; then
@@ -43,6 +55,23 @@ if ! lsmod | grep -q "qca_ssdk"; then
4355
exit 1
4456
fi
4557

58+
# ─── Wait for 1G carrier or timeout ───
59+
60+
elapsed=0
61+
while [ $elapsed -lt $CARRIER_TIMEOUT ]; do
62+
carrier=$(cat /sys/class/net/${IFACE}/carrier 2>/dev/null)
63+
if [ "$carrier" = "1" ]; then
64+
log "${IFACE} has carrier after ${elapsed}s — loading module"
65+
break
66+
fi
67+
sleep 2
68+
elapsed=$((elapsed + 2))
69+
done
70+
71+
if [ "$carrier" != "1" ]; then
72+
log "${IFACE} no carrier after ${CARRIER_TIMEOUT}s — loading module anyway (SFP may be hard-locked at 2.5G)"
73+
fi
74+
4675
# ─── Load module ───
4776

4877
log "Loading ${MODULE_NAME}..."

0 commit comments

Comments
 (0)