Skip to content

Commit 9809de3

Browse files
committed
WIP: adding features 1) distinct logging for RCP and Posix processes; 2) log style output of otbr.sh; 3) param if to set bb interface per OTBR; 4) fast fail handling when OTBR process exits - without 5 sec wait.
1 parent f3de162 commit 9809de3

14 files changed

Lines changed: 186 additions & 77 deletions

File tree

GUIDE.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,12 +292,15 @@ myusername ALL=(ALL) NOPASSWD: /usr/local/bin/ot-ctl
292292

293293
Check that the binaries are installed in the given locations: if not, adjust the paths accordingly. If both binaries are available in the user's PATH, OTNS will find them automatically.
294294

295-
Below is an example how OTNS should be run to add an OTBR to the simulation. Once OTNS is running, the CLI command to add an OTBR is `add otbr`.
295+
Below are examples how OTNS should be run to enable OTBRs in the simulation.
296296

297297
```bash
298-
otns -realtime -otbr-backbone-if eth0
298+
otns -realtime -otbr-if eth0
299+
otns -realtime
299300
```
300301

301302
The flag `-realtime` specifies that OTNS runs in real-time mode, required to support OTBR and other RCP/Posix based nodes.
302303

303-
The parameter `-otbr-backbone-if` specifies the network interface (AIL) that the OTBR should use to connect to the real network. It can be your local Ethernet or Wi-Fi interface, or a virtual network interface set up with Linux network namespaces. If the parameter is omitted, the OTBR will connect to the loopback interface (`lo`) by default. In this case, no external communication is possible.
304+
The parameter `-otbr-if` specifies the network interface (AIL) that the OTBR should use to connect to the real network. It can be your local Ethernet or Wi-Fi interface, or a virtual network interface set up with Linux network namespaces. If the parameter is omitted, the OTBR will connect to the loopback interface (`lo`) by default. In this case, no external IP communication is possible even though an OTBR can be started.
305+
306+
Once OTNS is running, the CLI command to add an OTBR is `add otbr` to use the default backbone interface as specified above. To use a different interface, use `add otbr if "wifi1"` for example to pick the 'wifi1' interface. This is useful when adding multiple OTBRs in a simulation.

cli/CmdRunner.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,9 @@ func (rt *CmdRunner) executeAddNode(cc *CommandContext, cmd *AddCmd) {
455455
if cmd.Raw != nil {
456456
cfg.IsRaw = true
457457
}
458+
if cmd.BackboneIf != nil {
459+
cfg.NetIfName = cmd.BackboneIf.Name
460+
}
458461

459462
rt.postAsyncWait(cc, func(sim *simulation.Simulation) {
460463
sim.NodeConfigFinalize(&cfg)

cli/ast.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,8 @@ type AddCmd struct {
307307
Restore *RestoreFlag `| @@` //nolint
308308
Version *ThreadVersion `| @@` //nolint
309309
Raw *RawFlag `| @@` //nolint
310-
Executable *ExecutableFlag `| @@ )*` //nolint
310+
Executable *ExecutableFlag `| @@` //nolint
311+
BackboneIf *BackboneIfFlag `| @@ )*` //nolint
311312
}
312313

313314
// noinspection GoVetStructTag
@@ -346,6 +347,12 @@ type RawFlag struct {
346347
Dummy struct{} `"raw"` //nolint
347348
}
348349

350+
// noinspection GoVetStructTag
351+
type BackboneIfFlag struct {
352+
Dummy struct{} `"if"` //nolint
353+
Name string `@String` //nolint
354+
}
355+
349356
// noinspection GoVetStructTag
350357
type MaxSpeedFlag struct {
351358
Dummy struct{} `( "max" | "inf")` //nolint

cli/cli_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ func TestParseBytes(t *testing.T) {
6161
assert.True(t, cmd.Add.RadioRange.Val == 1234)
6262
assert.Nil(t, parseBytes([]byte("add router x 1 y 2 id 3 rr 1234"), &cmd))
6363
assert.Nil(t, parseBytes([]byte("add router rr 1234 id 3 y 2 x 1"), &cmd))
64+
assert.Nil(t, parseBytes([]byte(`add otbr if "eth0"`), &cmd))
65+
assert.True(t, cmd.Add.BackboneIf != nil && cmd.Add.BackboneIf.Name == "eth0")
66+
assert.Nil(t, parseBytes([]byte("add otbr"), &cmd))
67+
assert.True(t, cmd.Add.BackboneIf == nil)
6468

6569
assert.Nil(t, parseBytes([]byte("autogo"), &cmd))
6670
assert.NotNil(t, cmd.AutoGo)

dispatcher/FailureCtrl_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ type mockDispatcherCallback struct {
4444
func (m mockDispatcherCallback) OnUartWrite(nodeid NodeId, data []byte) {
4545
}
4646

47-
func (m mockDispatcherCallback) OnLogWrite(nodeid NodeId, data []byte) {
47+
func (m mockDispatcherCallback) OnUartDisconnected(nodeid NodeId) {
48+
}
49+
50+
func (m mockDispatcherCallback) OnLogWrite(nodeid NodeId, data []byte, isFromHost bool) {
4851
}
4952

5053
func (m mockDispatcherCallback) OnNextEventTime(nextTimeUs uint64) {

dispatcher/dispatcher.go

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ type CallbackHandler interface {
6161
OnUartWrite(nodeid NodeId, data []byte)
6262

6363
// OnLogWrite Notifies that a log item was written to the node's log.
64-
OnLogWrite(nodeid NodeId, data []byte)
64+
OnLogWrite(nodeid NodeId, data []byte, isFromHost bool)
6565

6666
// OnNextEventTime Notifies that the Dispatcher simulated-time will move to the next event time.
6767
OnNextEventTime(nextTimeUs uint64)
@@ -78,7 +78,11 @@ type CallbackHandler interface {
7878
// started nodes currently (and then the connection to it will be closed by Dispatcher).
7979
OnNewNodeDetected(nodeid NodeId) bool
8080

81-
// OnNodeDisconnected Notifies that the Dispatcher detected disconnection of a node process.
81+
// OnUartDisconnected Notifies that the node's UART stream has ended (node process exited).
82+
// Called after stdout/stderr pipes for the node have been closed.
83+
OnUartDisconnected(nodeid NodeId)
84+
85+
// OnNodeDisconnected Notifies that the Dispatcher detected disconnection of a node process from the Unix socket.
8286
OnNodeDisconnected(nodeid NodeId)
8387
}
8488

@@ -479,7 +483,9 @@ func (d *Dispatcher) HandleEvent(evt *Event) {
479483
if d.isDeleted(nodeid) {
480484
switch evt.Type {
481485
case EventTypeUartWrite,
482-
EventTypeLogWrite:
486+
EventTypeUartDisconnected,
487+
EventTypeLogWrite,
488+
EventTypeLogWriteHost:
483489
node = d.deletedNodes[nodeid]
484490
default:
485491
return
@@ -536,16 +542,20 @@ func (d *Dispatcher) HandleEvent(evt *Event) {
536542
d.Counters.UartWriteEvents += 1
537543
d.cbHandler.OnUartWrite(node.Id, evt.Data)
538544
case EventTypeUartDisconnected:
539-
d.Counters.OtherEvents += 1
540-
d.setSleeping(node.Id)
541-
d.alarmMgr.SetTimestamp(node.Id, Ever)
545+
d.Counters.UartWriteEvents += 1
546+
// if the process exited, we trigger the node's/socket's disconnection.
547+
d.setDisconnected(node.Id)
548+
d.cbHandler.OnUartDisconnected(node.Id)
542549
case EventTypeLogWrite:
543550
d.Counters.LogWriteEvents += 1
544-
d.cbHandler.OnLogWrite(node.Id, evt.Data)
551+
d.cbHandler.OnLogWrite(node.Id, evt.Data, false)
552+
case EventTypeLogWriteHost:
553+
d.Counters.LogWriteEvents += 1
554+
d.cbHandler.OnLogWrite(node.Id, evt.Data, true)
545555
case EventTypeExtAddr:
546556
d.Counters.OtherEvents += 1
547-
var extaddr = binary.BigEndian.Uint64(evt.Data[0:8])
548-
node.onStatusPushExtAddr(extaddr)
557+
extAddr := binary.BigEndian.Uint64(evt.Data[0:8])
558+
node.onStatusPushExtAddr(extAddr)
549559
case EventTypeNodeInfo:
550560
d.Counters.OtherEvents += 1
551561
// No further handling is needed; the node info was already used at this point.
@@ -1085,13 +1095,17 @@ func (d *Dispatcher) setSleeping(nodeid NodeId) {
10851095

10861096
func (d *Dispatcher) setDisconnected(nodeid NodeId) {
10871097
node := d.nodes[nodeid]
1088-
logger.AssertNotNil(node)
1089-
logger.AssertFalse(d.isDeleted(nodeid))
1090-
1098+
if node == nil {
1099+
node = d.deletedNodes[nodeid]
1100+
if node == nil {
1101+
return // if node was already deleted
1102+
}
1103+
} else {
1104+
d.alarmMgr.SetTimestamp(nodeid, Ever)
1105+
d.setSleeping(nodeid)
1106+
}
10911107
logger.Debugf("Node %d is set to disconnected and sleeping state.", nodeid)
1092-
d.alarmMgr.SetTimestamp(nodeid, Ever)
1093-
d.setSleeping(nodeid)
1094-
node.hasDisconnected.Store(true)
1108+
node.DisconnectSocket()
10951109
}
10961110

10971111
// syncAliveNodes advances the node's time of alive nodes only to current dispatcher time.

event/event.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020-2024, The OTNS Authors.
1+
// Copyright (c) 2020-2026, The OTNS Authors.
22
// All rights reserved.
33
//
44
// Redistribution and use in source and binary forms, with or without
@@ -65,6 +65,7 @@ const (
6565
EventTypeIp6ToHost EventType = 21
6666
EventTypeUdpFromHost EventType = 22
6767
EventTypeIp6FromHost EventType = 23
68+
EventTypeLogWriteHost EventType = 24
6869
)
6970

7071
const (
@@ -94,8 +95,7 @@ type Event struct {
9495
MsgToHostData MsgToHostEventData
9596
}
9697

97-
// All ...EventData formats below only used by OT nodes supporting advanced
98-
// RF simulation.
98+
// All ...EventData formats below define the type-specific event payloads.
9999
const radioCommEventDataHeaderLen = 11 // from OT-RFSIM platform, event-sim.h struct
100100
type RadioCommEventData struct {
101101
Channel uint8

logger/nodelogger.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,14 @@ func (nl *NodeLogger) IsLevelVisible(level Level) bool {
159159

160160
// LogOt logs a complete OT format log string, which includes timestamp, log level character, and
161161
// the log message. The right level to log is automatically determined.
162-
func (nl *NodeLogger) LogOt(levelAndMsg string) {
163-
isOtLogLine, level := ParseOtLogLine(levelAndMsg)
162+
// If alternateMarker is non-empty, it will replace the standard ": " marker of the log message.
163+
func (nl *NodeLogger) LogOt(levelAndMsg string, alternateMarker string) {
164+
isOtLogLine, level, adaptedMsg := ParseOtLogLineAndAdaptMarker(levelAndMsg, alternateMarker)
164165
levelAndMsg = strings.TrimSpace(levelAndMsg)
165166
if isOtLogLine {
166-
NodeLogf(nl.Id, level, levelAndMsg)
167+
NodeLogf(nl.Id, level, adaptedMsg)
167168
} else {
168-
NodeLogf(nl.Id, InfoLevel, levelAndMsg)
169+
NodeLogf(nl.Id, InfoLevel, adaptedMsg)
169170
}
170171
}
171172

logger/parse.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ package logger
2929
import (
3030
"fmt"
3131
"regexp"
32+
"strings"
3233
)
3334

3435
const (
3536
OffLevelString = "off"
3637
NoneLevelString = "none"
3738
DefaultLevelString = "default"
39+
AltOtLogMarker = "| "
3840
)
3941

4042
// Example Posix ot-cli OTNS status push: 00:00:02.233 [-] Otns----------: transmit=11,d841,121,ffff
@@ -108,6 +110,33 @@ func ParseOtnsStatusPush(line string) (bool, string) {
108110
return true, match[1]
109111
}
110112

113+
// ParseOtLogLineAndAdaptMarker checks like ParseOtLogLine and if it is a log line, and if so, it replaces
114+
// the ': ' string that marks the start of the log message (after the log module name) with the provided
115+
// marker string, unless the marker is empty "".
116+
func ParseOtLogLineAndAdaptMarker(line string, marker string) (bool, Level, string) {
117+
logIdx := logPattern.FindStringSubmatchIndex(line)
118+
if logIdx == nil {
119+
return false, OffLevel, line
120+
}
121+
122+
// Find the module/message separator colon (first ': ' after the level marker).
123+
level := parseOtLevelChar(line[logIdx[2]])
124+
if len(marker) > 0 {
125+
n := logIdx[1] + 1
126+
linePart := line[n:]
127+
moduleColonOffset := strings.Index(linePart, ": ")
128+
if moduleColonOffset >= 8 && moduleColonOffset <= 28 {
129+
// verify that no space exists between module name and colon
130+
firstSpaceOffset := strings.Index(linePart, " ")
131+
if firstSpaceOffset > moduleColonOffset {
132+
msgStart := n + moduleColonOffset + 2
133+
line = line[:msgStart-2] + marker + line[msgStart:]
134+
}
135+
}
136+
}
137+
return true, level, line
138+
}
139+
111140
func GetLevelString(level Level) string {
112141
switch level {
113142
case MicroLevel:

ot-rfsim/ot-versions/ot-br.sh

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,17 @@
2929

3030
# ot-br.sh - script to start an OTBR node from an OTNS simulation.
3131

32+
DEBG="[DEBG]-OTBR.SH-:"
33+
CRIT="[CRIT]-OTBR.SH-:"
34+
INFO="[INFO]-OTBR.SH-:"
35+
3236
# When a running script receives SIGTERM
3337
cleanup()
3438
{
35-
echo "[DEBG] ot-br.sh: caught signal, cleaning up child processes"
39+
echo "${DEBG} ot-br.sh: caught signal, cleaning up child processes"
3640
jobs -p | xargs -r kill
3741
wait
38-
echo "[DEBG] ot-br.sh: script exit"
42+
echo "${DEBG} ot-br.sh: script exit"
3943
exit 0
4044
}
4145

@@ -45,16 +49,16 @@ sudo_command_failed()
4549
{
4650
local cmd="$1"
4751
local cmd_path
48-
echo "[CRIT] passwordless sudo failed for '${cmd}'"
49-
echo "[CRIT] add below lines to /etc/sudoers (run: sudo visudo):"
52+
echo "${CRIT} passwordless sudo failed for '${cmd}'"
53+
echo "${CRIT} add below lines to /etc/sudoers (run: sudo visudo):"
5054

5155
cmd="otbr-agent"
5256
cmd_path=$(command -v "$cmd" 2>/dev/null)
53-
echo "[CRIT] ${USER} ALL=(ALL) NOPASSWD: SETENV: ${cmd_path:-/usr/local/sbin/${cmd}}"
57+
echo "[CRIT] ${USER} ALL=(ALL) NOPASSWD: SETENV: ${cmd_path:-/usr/local/sbin/${cmd}}"
5458

5559
cmd="ot-ctl"
5660
cmd_path=$(command -v "$cmd" 2>/dev/null)
57-
echo "[CRIT] ${USER} ALL=(ALL) NOPASSWD: ${cmd_path:-/usr/local/bin/${cmd}}"
61+
echo "[CRIT] ${USER} ALL=(ALL) NOPASSWD: ${cmd_path:-/usr/local/bin/${cmd}}"
5862
exit 1
5963
}
6064

@@ -69,49 +73,50 @@ socket_in_use()
6973
fi
7074
}
7175

72-
echo "[DEBG] ot-br.sh started"
76+
echo "${DEBG} ot-br.sh started"
7377

7478
NODE_ID=$1
7579
BACKBONE_IF_NAME=$2
7680
AGENT_PARAM=$3
7781
RADIO_URL=$4
78-
# TODO: 'wpan1' for node 1 etc doesn't work - needs to be wpan0. Check later if there's a way to use others.
79-
THREAD_IF_NAME="wpan0"
82+
THREAD_IF_NAME="wpan${PORT_OFFSET}_${NODE_ID}"
83+
REST_PORT=$((8080 + NODE_ID))
8084
EXTRA_DELAY=0
8185
MAX_WAIT_SEC=5
8286
SOCKET_PATH="/run/openthread-${THREAD_IF_NAME}.sock"
8387
DATA_PATH="/var/lib/thread"
8488

85-
echo "[DEBG] PORT_OFFSET =${PORT_OFFSET}"
86-
echo "[DEBG] NODE_ID =${NODE_ID}"
87-
echo "[DEBG] BACKBONE_IF_NAME=${BACKBONE_IF_NAME}"
88-
echo "[DEBG] THREAD_IF_NAME =${THREAD_IF_NAME}"
89-
echo "[DEBG] RADIO_URL =${RADIO_URL}"
90-
echo "[DEBG] SOCKET_PATH =${SOCKET_PATH}"
91-
echo "[DEBG] DATA_PATH =${DATA_PATH}"
92-
echo "[DEBG] AGENT_PARAM =${AGENT_PARAM}"
89+
echo "${DEBG} PORT_OFFSET =${PORT_OFFSET}"
90+
echo "${DEBG} NODE_ID =${NODE_ID}"
91+
echo "${DEBG} BACKBONE_IF_NAME=${BACKBONE_IF_NAME}"
92+
echo "${DEBG} THREAD_IF_NAME =${THREAD_IF_NAME}"
93+
echo "${DEBG} REST_PORT =${REST_PORT}"
94+
echo "${DEBG} RADIO_URL =${RADIO_URL}"
95+
echo "${DEBG} SOCKET_PATH =${SOCKET_PATH}"
96+
echo "${DEBG} DATA_PATH =${DATA_PATH}"
97+
echo "${DEBG} AGENT_PARAM =${AGENT_PARAM}"
9398

9499
# check for passwordless sudo access to commands
95100
sudo -n otbr-agent -V >/dev/null 2>&1 || sudo_command_failed otbr-agent
96101
sudo -n ot-ctl -h >/dev/null 2>&1 || sudo_command_failed ot-ctl
97102

98-
# check for existing socket usage
103+
# check for existing socket usage`
99104
if [ -S "${SOCKET_PATH}" ]; then
100105
if socket_in_use "${SOCKET_PATH}"; then
101-
echo "[CRIT] socket ${SOCKET_PATH} is already in use - is otbr-agent already running on ${THREAD_IF_NAME}?"
106+
echo "${CRIT} socket ${SOCKET_PATH} is already in use - is otbr-agent already running on ${THREAD_IF_NAME}?"
102107
exit 1
103108
fi
104-
echo "[DEBG] existing (unused?) file ${SOCKET_PATH} detected - start with extra delay"
109+
echo "${DEBG} existing (unused?) file ${SOCKET_PATH} detected - start with extra delay"
105110
EXTRA_DELAY=3
106111
fi
107112

108-
echo "[DEBG] starting otbr-agent"
113+
echo "${INFO} starting otbr-agent"
109114
# All otbr-agent output redirected to stderr, so that ot-ctl CLI interactions are not garbled.
110115
sudo -n PORT_OFFSET="${PORT_OFFSET}" otbr-agent --data-path "${DATA_PATH}" -s -d 7 -I ${THREAD_IF_NAME} -B "${BACKBONE_IF_NAME}" \
111-
"${AGENT_PARAM}" "${RADIO_URL}" 1>&2 &
116+
--rest-listen-port "${REST_PORT}" "${AGENT_PARAM}" "${RADIO_URL}" 1>&2 &
112117
SUDO_OTBR_PID=$!
113118

114-
echo "[DEBG] otbr-agent started in background (parent PID=${SUDO_OTBR_PID}) - waiting until ready"
119+
echo "${DEBG} otbr-agent started in background (parent PID=${SUDO_OTBR_PID}) - waiting until ready"
115120

116121
# wait for otbr-agent to create its Unix socket
117122
elapsed=0
@@ -127,11 +132,12 @@ while [ ! -S "${SOCKET_PATH}" ]; do
127132
sleep 0.1
128133
elapsed=$((elapsed + 1))
129134
done
130-
echo "[DEBG] otbr-agent socket ready after $((elapsed / 10)).$((elapsed % 10))s, starting ot-ctl CLI"
135+
echo "${DEBG} otbr-agent socket ready after $((elapsed / 10)).$((elapsed % 10))s, adding ${EXTRA_DELAY} s delay"
131136
sleep ${EXTRA_DELAY}
132-
sudo -n ot-ctl
137+
echo "${INFO} starting ot-ctl CLI"
138+
sudo -n ot-ctl -I "${THREAD_IF_NAME}"
133139

134-
echo "[DEBG] ot-br.sh: ot-ctl CLI exited, cleaning up child processes"
140+
echo "${DEBG} ot-br.sh: ot-ctl CLI exited, cleaning up child processes"
135141
jobs -p | xargs -r kill
136142
wait
137-
echo "[DEBG] ot-br.sh: script exit"
143+
echo "${DEBG} ot-br.sh: script exit"

0 commit comments

Comments
 (0)