Skip to content

Commit 8ced444

Browse files
fix: guard interfaceIds with linkLock + cancel stale announcePoller
- interfaceIds had no synchronization while links/linkStateTasks did: statusSnapshot() read it (.first(where:)) concurrently with start()'s bring-up loop and addInterface/removeInterface writes from other tasks. Swift Dictionary read+write is UB (release-build crash). Extend linkLock to every interfaceIds access; statusSnapshot takes a COW snapshot under the lock so the map stays lock-free. - startAnnouncePolling() overwrote announcePoller without cancelling it. start() has no idempotency guard, so a second start() without stop() leaked the old poller (still polling its captured, now-orphaned PathTable) and double-yielded every announce. Cancel before reassigning. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 4e56d93 commit 8ced444

1 file changed

Lines changed: 36 additions & 10 deletions

File tree

Sources/RNSBackendSwift/SwiftRNSBackend.swift

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,17 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
4343
/// stale linkState events into the next session after a restart.
4444
private var linkStateTasks: [Int: Task<Void, Never>] = [:]
4545
private var nextLinkId: Int = 1
46-
/// Serializes the link-id allocation and the `links` / `linkStateTasks`
47-
/// maps. SwiftRNSBackend is a class (not an actor) so `linkSend` stays
48-
/// hop-free on the audio path; without this lock, concurrent `openLink`
49-
/// callers race the `nextLinkId` increment + the dictionary writes — two
50-
/// callers can grab the same id and the second write orphans the first
51-
/// link (never tracked, never cancelled by linkTeardown/stop). Mirrors the
52-
/// Python backend's `_link_id_lock`. Never held across an `await`.
46+
/// Serializes the mutable registries that async methods touch from
47+
/// different tasks: `nextLinkId`, `links`, `linkStateTasks`, and
48+
/// `interfaceIds`. SwiftRNSBackend is a class (not an actor) so `linkSend`
49+
/// stays hop-free on the audio path; without this lock, concurrent
50+
/// `openLink` callers race the `nextLinkId` increment + dictionary writes
51+
/// (two callers grab the same id, the second orphans the first link), and
52+
/// `statusSnapshot()` can read `interfaceIds` while `start()`/`addInterface`
53+
/// mutate it (Swift Dictionary read+write races are UB → release-build
54+
/// crashes). Mirrors the Python backend's `_link_id_lock`. A Dictionary is
55+
/// a value type, so snapshotting one under the lock (`let copy = dict`) is
56+
/// a cheap COW retain. Never held across an `await`.
5357
private let linkLock = NSLock()
5458

5559
/// Section-name → reticulum interface id, backing the Python-shaped
@@ -162,7 +166,9 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
162166
let section = PythonConfigWriter.sectionName(for: entity)
163167
do {
164168
try await buildAndAdd(entity)
169+
linkLock.lock()
165170
interfaceIds[section] = entity.id
171+
linkLock.unlock()
166172
} catch {
167173
Self.log.error("start: interface \(section, privacy: .public) bring-up failed: \(String(describing: error), privacy: .public)")
168174
}
@@ -206,6 +212,11 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
206212
/// lxmf.propagation / lxst.telephony / nomadnetwork.node). `lastSeen` is
207213
/// task-local, so no shared mutable state escapes the poller.
208214
private func startAnnouncePolling() {
215+
// Cancel any prior poller first: start() has no idempotency guard, so a
216+
// second start() without an intervening stop() would otherwise leak the
217+
// old task (still polling the orphaned PathTable it captured) and yield
218+
// every announce twice.
219+
announcePoller?.cancel()
209220
guard let pathTable else { return }
210221
let cont = eventContinuation
211222
announcePoller = Task {
@@ -530,6 +541,11 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
530541
public func statusSnapshot() async -> StatusSnapshot? {
531542
guard let transport else { return nil }
532543
let snaps = await transport.getInterfaceSnapshots()
544+
// Snapshot interfaceIds under the lock (cheap COW copy) so the map below
545+
// can't race a concurrent start()/addInterface()/removeInterface() write.
546+
linkLock.lock()
547+
let interfaceIdsSnapshot = interfaceIds
548+
linkLock.unlock()
533549
let ifaces = snaps.map { s in
534550
// Report the config *section name* (what AppServices matches against
535551
// via PythonConfigWriter.sectionName), NOT the raw reticulum interface
@@ -538,7 +554,7 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
538554
// (section -> entity.id). Without this, AppServices.applyPythonInterfaceStatus
539555
// never matches a Swift-backend interface to its entity, so the UI's
540556
// connection badge stays "disconnected" even when the interface is up.
541-
let section = interfaceIds.first(where: { $0.value == s.id })?.key ?? s.id
557+
let section = interfaceIdsSnapshot.first(where: { $0.value == s.id })?.key ?? s.id
542558
return StatusSnapshot.InterfaceStatus(
543559
sectionName: section,
544560
name: s.name,
@@ -690,13 +706,18 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
690706
guard transport != nil, identity != nil else { return (false, "not started") }
691707
// Idempotent: start() already brings up enabled interfaces, and the
692708
// apply/hot-reload path may re-request one — don't add it twice.
693-
if interfaceIds[name] != nil { return (true, "already added") }
709+
linkLock.lock()
710+
let alreadyAdded = interfaceIds[name] != nil
711+
linkLock.unlock()
712+
if alreadyAdded { return (true, "already added") }
694713
guard let entity = Self.entity(forSection: name) else {
695714
return (false, "no configured interface named \(name)")
696715
}
697716
do {
698717
try await buildAndAdd(entity)
718+
linkLock.lock()
699719
interfaceIds[name] = entity.id
720+
linkLock.unlock()
700721
return (true, "")
701722
} catch {
702723
return (false, "\(error)")
@@ -708,11 +729,16 @@ public final class SwiftRNSBackend: RnsBackend, @unchecked Sendable {
708729
guard let transport else { return (false, "not started") }
709730
// Prefer the tracked id; fall back to the entity's id if we never tracked
710731
// it (e.g. added before this process started). removeInterface disconnects.
711-
guard let rid = interfaceIds[name] ?? Self.entity(forSection: name)?.id else {
732+
linkLock.lock()
733+
let tracked = interfaceIds[name]
734+
linkLock.unlock()
735+
guard let rid = tracked ?? Self.entity(forSection: name)?.id else {
712736
return (false, "no interface named \(name)")
713737
}
714738
await transport.removeInterface(id: rid)
739+
linkLock.lock()
715740
interfaceIds.removeValue(forKey: name)
741+
linkLock.unlock()
716742
return (true, "")
717743
}
718744

0 commit comments

Comments
 (0)