Skip to content

Commit 3fb29af

Browse files
committed
Fix lookup scale validation gates
1 parent ed0befa commit 3fb29af

24 files changed

Lines changed: 927 additions & 389 deletions

.agents/sow/current/SOW-0021-20260613-netipc-at-scale.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,19 @@ Tests or equivalent validation:
10571057
- Validation passed on POSIX: `cd src/go && go test -count=1 -timeout=300s ./pkg/netipc/protocol`; `cd src/go && go test -count=1 -timeout=300s ./pkg/netipc/service/raw`; `cargo test --manifest-path src/crates/netipc/Cargo.toml protocol::lookup -- --nocapture`; `cargo test --manifest-path src/crates/netipc/Cargo.toml lookup_dispatch_tests -- --nocapture`; `cargo test --manifest-path src/crates/netipc/Cargo.toml service::raw:: -- --nocapture`.
10581058
- Validation passed on Windows `win11`: `cd src/go && "/c/Program Files/Go/bin/go.exe" test -count=1 -timeout=300s ./pkg/netipc/protocol`; `cd src/go && "/c/Program Files/Go/bin/go.exe" test -count=1 -timeout=300s ./pkg/netipc/service/raw`; `PATH=/c/Users/costa/.cargo/bin:$PATH cargo test --manifest-path src/crates/netipc/Cargo.toml protocol::lookup -- --nocapture`; `PATH=/c/Users/costa/.cargo/bin:$PATH cargo test --manifest-path src/crates/netipc/Cargo.toml service::raw:: -- --nocapture`.
10591059
- Reviewer concern about suffix-reservation still losing partial responses was not reproduced: C POSIX `timeout 900 build-coverage/bin/test_service` passed with `642 passed, 0 failed`; focused POSIX Go `TestLookupLargeResponseSplit` passed; focused POSIX Rust `test_lookup_large_response_split_calls` passed; Windows Rust `test_lookup_large_response_split_calls_windows` passed. The concern was treated as rejected unless a failing reproducer is produced.
1060-
- Note: `ctest` on `$PATH` resolved to a broken local Python wrapper missing the `cmake` module; validation used `/usr/bin/ctest`.
1060+
- Post-push CI failure fixes after pushed commit `ed0befa`:
1061+
- GitHub `Runtime Safety` run `27506742409` failed in `bash tests/run-go-race.sh`.
1062+
- Root cause 1: Go protocol tests fabricated impossible multi-GB slices with `unsafe.Slice`, and Go race/checkptr rejected them with `fatal error: checkptr: unsafe.Slice result straddles multiple allocations` in `src/go/pkg/netipc/protocol/frame_test.go`.
1063+
- Root cause 2: Go POSIX raw server stop/close had a real race between `src/go/pkg/netipc/service/raw/server_unix.go` `Run()` defer and `Stop()`, plus unsynchronized `src/go/pkg/netipc/transport/posix/uds_listener.go` listener `fd`/`path` mutation.
1064+
- GitHub `Static Analysis` run `27506742397` failed in the Go `src/go` matrix because `gosec` reported `G103` unsafe-pointer findings in `src/go/pkg/netipc/protocol/lookup_guard_test.go`, `src/go/pkg/netipc/service/raw/cache_test.go`, and `src/go/pkg/netipc/service/raw/lookup_common_test.go`.
1065+
- Fixed Go tests by replacing invalid synthetic `unsafe` slices with unexported arithmetic/count helper checks used by the production encoders/builders. This preserves overflow coverage without invalid memory.
1066+
- Fixed Go POSIX listener/server close races by making listener `fd`, `path`, and config access mutex-protected and making raw server listener ownership/close synchronized and idempotent. Mirrored the raw server listener ownership fix on Windows.
1067+
- Fixed newly exposed local `gosec` `G115` findings by replacing unchecked count and size casts with checked conversions and helper comparisons against `uint32` payload ceilings.
1068+
- Local validation passed after fixes: `bash tests/run-go-race.sh`; `cd src/go && go test ./...`; `cd src/go && go vet ./...`; `cd src/go && staticcheck ./...`; `cd src/go && govulncheck ./...`; `cd src/go && gosec -quiet -fmt sarif -out /tmp/plugin-ipc-gosec.sarif -exclude=G404 ./...`.
1069+
- Local Go static matrix validation also passed for `tests/fixtures/go` and `bench/drivers/go`: `go test ./...`, `go vet ./...`, `staticcheck ./...`, `govulncheck ./...`, and `gosec -quiet -fmt sarif ...`.
1070+
- Local Rust static validation passed after replacing two raw lookup `drop(view)` borrow-shortening calls with lexical scopes: `cargo fmt --manifest-path src/crates/netipc/Cargo.toml --all --check`; `cargo test --manifest-path src/crates/netipc/Cargo.toml --all-targets --all-features --no-run`; `cargo clippy --manifest-path src/crates/netipc/Cargo.toml --all-targets --all-features -- -D clippy::correctness -D clippy::suspicious`; `cargo audit`; `cargo deny check advisories bans sources`.
1071+
- Follow-up local validation on 2026-06-14 passed: `git diff --check`; `bash .agents/sow/audit.sh`; `bash tests/run-go-race.sh`; `bash tests/run-coverage-go.sh` with `90.4%` total coverage (`3375/3735`) against the configured `90%` threshold; `cargo fmt --manifest-path src/crates/netipc/Cargo.toml --all --check`; `cargo test --manifest-path src/crates/netipc/Cargo.toml --all-targets --all-features --no-run`; `cargo clippy --manifest-path src/crates/netipc/Cargo.toml --all-targets --all-features -- -D clippy::correctness -D clippy::suspicious`.
1072+
- Note: `ctest` on `$PATH` resolved to a broken local Python wrapper missing the `cmake` module; validation used `/usr/bin/ctest`.
10611073

10621074
Real-use evidence:
10631075

src/crates/netipc/src/service/raw/apps_lookup.rs

Lines changed: 48 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -118,61 +118,66 @@ impl RawClient {
118118
RawCallKind::single(),
119119
remaining_timeout,
120120
)?;
121-
let view = AppsLookupResponseView::decode(self.response_payload(response)?)?;
122-
if let Some(expected_generation) = generation {
123-
if view.generation != expected_generation {
124-
return Err(NipcError::BadLayout);
125-
}
126-
} else {
127-
generation = Some(view.generation);
128-
}
129-
if view.item_count != req_pids.len() as u32 {
130-
return Err(NipcError::BadItemCount);
131-
}
132-
let mut payload_exceeded_at: Option<usize> = None;
133-
for (i, expected) in req_pids.iter().enumerate() {
134-
let item = view.item(i as u32)?;
135-
if item.pid != *expected {
136-
return Err(NipcError::BadLayout);
121+
let final_size = {
122+
let view = AppsLookupResponseView::decode(self.response_payload(response)?)?;
123+
if let Some(expected_generation) = generation {
124+
if view.generation != expected_generation {
125+
return Err(NipcError::BadLayout);
126+
}
127+
} else {
128+
generation = Some(view.generation);
137129
}
138-
if item.status == PID_LOOKUP_PAYLOAD_EXCEEDED {
139-
payload_exceeded_at = Some(i);
140-
break;
130+
if view.item_count != req_pids.len() as u32 {
131+
return Err(NipcError::BadItemCount);
141132
}
142-
raw_items.push(view.raw_item(i as u32)?.to_vec());
143-
}
144-
if let Some(first) = payload_exceeded_at {
145-
for (i, expected) in req_pids.iter().enumerate().skip(first) {
133+
let mut payload_exceeded_at: Option<usize> = None;
134+
for (i, expected) in req_pids.iter().enumerate() {
146135
let item = view.item(i as u32)?;
147-
if item.pid != *expected || item.status != PID_LOOKUP_PAYLOAD_EXCEEDED {
136+
if item.pid != *expected {
148137
return Err(NipcError::BadLayout);
149138
}
139+
if item.status == PID_LOOKUP_PAYLOAD_EXCEEDED {
140+
payload_exceeded_at = Some(i);
141+
break;
142+
}
143+
raw_items.push(view.raw_item(i as u32)?.to_vec());
144+
}
145+
if let Some(first) = payload_exceeded_at {
146+
for (i, expected) in req_pids.iter().enumerate().skip(first) {
147+
let item = view.item(i as u32)?;
148+
if item.pid != *expected || item.status != PID_LOOKUP_PAYLOAD_EXCEEDED {
149+
return Err(NipcError::BadLayout);
150+
}
151+
}
152+
if first == 0 {
153+
return Err(NipcError::Overflow);
154+
}
155+
start += first;
156+
continue;
157+
}
158+
start += req_count;
159+
if start < pids.len() {
160+
continue;
161+
}
162+
if raw_items.len() != pids.len() {
163+
return Err(NipcError::BadItemCount);
150164
}
151-
if first == 0 {
165+
let size = lookup_raw_response_size(
166+
APPS_LOOKUP_RESP_HDR_SIZE,
167+
raw_items.len(),
168+
&raw_items,
169+
)?;
170+
if size > self.max_logical_lookup_response_bytes as usize {
152171
return Err(NipcError::Overflow);
153172
}
154-
start += first;
155-
continue;
156-
}
157-
start += req_count;
158-
if start < pids.len() {
159-
continue;
160-
}
161-
if raw_items.len() != pids.len() {
162-
return Err(NipcError::BadItemCount);
163-
}
164-
let size =
165-
lookup_raw_response_size(APPS_LOOKUP_RESP_HDR_SIZE, raw_items.len(), &raw_items)?;
166-
if size > self.max_logical_lookup_response_bytes as usize {
167-
return Err(NipcError::Overflow);
168-
}
169-
drop(view);
170-
self.transport_buf.resize(size, 0);
173+
size
174+
};
175+
self.transport_buf.resize(final_size, 0);
171176
let refs: Vec<&[u8]> = raw_items.iter().map(Vec::as_slice).collect();
172177
let n = protocol::encode_apps_lookup_raw_response(
173178
&refs,
174179
generation.unwrap_or(0),
175-
&mut self.transport_buf[..size],
180+
&mut self.transport_buf[..final_size],
176181
)?;
177182
return AppsLookupResponseView::decode(&self.transport_buf[..n]);
178183
}

src/crates/netipc/src/service/raw/cgroups_lookup.rs

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -165,66 +165,68 @@ impl RawClient {
165165
RawCallKind::single(),
166166
remaining_timeout,
167167
)?;
168-
let view = CgroupsLookupResponseView::decode(self.response_payload(response)?)?;
169-
if let Some(expected_generation) = generation {
170-
if view.generation != expected_generation {
171-
return Err(NipcError::BadLayout);
172-
}
173-
} else {
174-
generation = Some(view.generation);
175-
}
176-
if view.item_count != req_paths.len() as u32 {
177-
return Err(NipcError::BadItemCount);
178-
}
179-
let mut payload_exceeded_at: Option<usize> = None;
180-
for (i, expected) in req_paths.iter().enumerate() {
181-
let item = view.item(i as u32)?;
182-
if item.path.as_bytes() != *expected {
183-
return Err(NipcError::BadLayout);
168+
let final_size = {
169+
let view = CgroupsLookupResponseView::decode(self.response_payload(response)?)?;
170+
if let Some(expected_generation) = generation {
171+
if view.generation != expected_generation {
172+
return Err(NipcError::BadLayout);
173+
}
174+
} else {
175+
generation = Some(view.generation);
184176
}
185-
if item.status == CGROUP_LOOKUP_PAYLOAD_EXCEEDED {
186-
payload_exceeded_at = Some(i);
187-
break;
177+
if view.item_count != req_paths.len() as u32 {
178+
return Err(NipcError::BadItemCount);
188179
}
189-
raw_items.push(view.raw_item(i as u32)?.to_vec());
190-
}
191-
if let Some(first) = payload_exceeded_at {
192-
for (i, expected) in req_paths.iter().enumerate().skip(first) {
180+
let mut payload_exceeded_at: Option<usize> = None;
181+
for (i, expected) in req_paths.iter().enumerate() {
193182
let item = view.item(i as u32)?;
194-
if item.path.as_bytes() != *expected
195-
|| item.status != CGROUP_LOOKUP_PAYLOAD_EXCEEDED
196-
{
183+
if item.path.as_bytes() != *expected {
197184
return Err(NipcError::BadLayout);
198185
}
186+
if item.status == CGROUP_LOOKUP_PAYLOAD_EXCEEDED {
187+
payload_exceeded_at = Some(i);
188+
break;
189+
}
190+
raw_items.push(view.raw_item(i as u32)?.to_vec());
191+
}
192+
if let Some(first) = payload_exceeded_at {
193+
for (i, expected) in req_paths.iter().enumerate().skip(first) {
194+
let item = view.item(i as u32)?;
195+
if item.path.as_bytes() != *expected
196+
|| item.status != CGROUP_LOOKUP_PAYLOAD_EXCEEDED
197+
{
198+
return Err(NipcError::BadLayout);
199+
}
200+
}
201+
if first == 0 {
202+
return Err(NipcError::Overflow);
203+
}
204+
start += first;
205+
continue;
199206
}
200-
if first == 0 {
207+
start += req_count;
208+
if start < paths.len() {
209+
continue;
210+
}
211+
if raw_items.len() != paths.len() {
212+
return Err(NipcError::BadItemCount);
213+
}
214+
let size = lookup_raw_response_size(
215+
CGROUPS_LOOKUP_RESP_HDR_SIZE,
216+
raw_items.len(),
217+
&raw_items,
218+
)?;
219+
if size > self.max_logical_lookup_response_bytes as usize {
201220
return Err(NipcError::Overflow);
202221
}
203-
start += first;
204-
continue;
205-
}
206-
start += req_count;
207-
if start < paths.len() {
208-
continue;
209-
}
210-
if raw_items.len() != paths.len() {
211-
return Err(NipcError::BadItemCount);
212-
}
213-
let size = lookup_raw_response_size(
214-
CGROUPS_LOOKUP_RESP_HDR_SIZE,
215-
raw_items.len(),
216-
&raw_items,
217-
)?;
218-
if size > self.max_logical_lookup_response_bytes as usize {
219-
return Err(NipcError::Overflow);
220-
}
221-
drop(view);
222-
self.transport_buf.resize(size, 0);
222+
size
223+
};
224+
self.transport_buf.resize(final_size, 0);
223225
let refs: Vec<&[u8]> = raw_items.iter().map(Vec::as_slice).collect();
224226
let n = protocol::encode_cgroups_lookup_raw_response(
225227
&refs,
226228
generation.unwrap_or(0),
227-
&mut self.transport_buf[..size],
229+
&mut self.transport_buf[..final_size],
228230
)?;
229231
return CgroupsLookupResponseView::decode(&self.transport_buf[..n]);
230232
}

src/go/pkg/netipc/protocol/apps_lookup.go

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,22 +123,11 @@ func validateAppsLookupKnown(v appsLookupSemantics) error {
123123

124124
func EncodeAppsLookupRequest(pids []uint32, buf []byte) (int, error) {
125125
count := len(pids)
126-
if uint64(count) > uint64(^uint32(0)) {
127-
return 0, ErrOverflow
128-
}
129-
dirSize, ok := checkedMulInt(count, LookupDirEntrySize)
130-
if !ok {
131-
return 0, ErrOverflow
132-
}
133-
keySize, ok := checkedMulInt(count, AppsLookupKeySize)
134-
if !ok {
135-
return 0, ErrOverflow
136-
}
137-
packedStart, ok := checkedAddInt(AppsLookupReqHdr, dirSize)
126+
count32, ok := checkedU32Int(count)
138127
if !ok {
139128
return 0, ErrOverflow
140129
}
141-
total, ok := checkedAddInt(packedStart, keySize)
130+
packedStart, total, ok := appsLookupRequestLayoutForCount(count)
142131
if !ok {
143132
return 0, ErrOverflow
144133
}
@@ -159,12 +148,35 @@ func EncodeAppsLookupRequest(pids []uint32, buf []byte) (int, error) {
159148
}
160149
ne.PutUint16(buf[0:2], 1)
161150
ne.PutUint16(buf[2:4], 0)
162-
ne.PutUint32(buf[4:8], uint32(count))
151+
ne.PutUint32(buf[4:8], count32)
163152
ne.PutUint32(buf[8:12], 0)
164153
ne.PutUint32(buf[12:16], 0)
165154
return total, nil
166155
}
167156

157+
func appsLookupRequestLayoutForCount(count int) (packedStart, total int, ok bool) {
158+
if count < 0 || uint64(count) > uint64(^uint32(0)) {
159+
return 0, 0, false
160+
}
161+
dirSize, ok := checkedMulInt(count, LookupDirEntrySize)
162+
if !ok {
163+
return 0, 0, false
164+
}
165+
keySize, ok := checkedMulInt(count, AppsLookupKeySize)
166+
if !ok {
167+
return 0, 0, false
168+
}
169+
packedStart, ok = checkedAddInt(AppsLookupReqHdr, dirSize)
170+
if !ok {
171+
return 0, 0, false
172+
}
173+
total, ok = checkedAddInt(packedStart, keySize)
174+
if !ok {
175+
return 0, 0, false
176+
}
177+
return packedStart, total, true
178+
}
179+
168180
func DecodeAppsLookupRequest(buf []byte) (*AppsLookupRequestView, error) {
169181
if len(buf) < AppsLookupReqHdr {
170182
return nil, ErrTruncated

src/go/pkg/netipc/protocol/cgroups_lookup.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,11 @@ func validateCgroupsLookupSemantics(status, orchestrator uint16, pathLen, nameLe
5656

5757
func EncodeCgroupsLookupRequest(paths [][]byte, buf []byte) (int, error) {
5858
count := len(paths)
59-
if uint64(count) > uint64(^uint32(0)) {
60-
return 0, ErrOverflow
61-
}
62-
dirSize, ok := checkedMulInt(count, LookupDirEntrySize)
59+
count32, ok := checkedU32Int(count)
6360
if !ok {
6461
return 0, ErrOverflow
6562
}
66-
packedStart, ok := checkedAddInt(CgroupsLookupReqHdr, dirSize)
63+
packedStart, ok := cgroupsLookupRequestPackedStartForCount(count)
6764
if !ok {
6865
return 0, ErrOverflow
6966
}
@@ -108,12 +105,23 @@ func EncodeCgroupsLookupRequest(paths [][]byte, buf []byte) (int, error) {
108105
}
109106
ne.PutUint16(buf[0:2], 1)
110107
ne.PutUint16(buf[2:4], 0)
111-
ne.PutUint32(buf[4:8], uint32(count))
108+
ne.PutUint32(buf[4:8], count32)
112109
ne.PutUint32(buf[8:12], 0)
113110
ne.PutUint32(buf[12:16], 0)
114111
return data, nil
115112
}
116113

114+
func cgroupsLookupRequestPackedStartForCount(count int) (int, bool) {
115+
if count < 0 || uint64(count) > uint64(^uint32(0)) {
116+
return 0, false
117+
}
118+
dirSize, ok := checkedMulInt(count, LookupDirEntrySize)
119+
if !ok {
120+
return 0, false
121+
}
122+
return checkedAddInt(CgroupsLookupReqHdr, dirSize)
123+
}
124+
117125
func DecodeCgroupsLookupRequest(buf []byte) (*CgroupsLookupRequestView, error) {
118126
if len(buf) < CgroupsLookupReqHdr {
119127
return nil, ErrTruncated

0 commit comments

Comments
 (0)