Skip to content

Commit b07e621

Browse files
committed
chore: drop per-batch size chunking to match upstream uploader
The 256 KB ceiling I added speculatively when the server cap was 256 KB no longer matches the reference implementation we're mirroring, which sends each flush as a single POST regardless of size. With the server cap now well above any plausible single-flush volume, chunking is unnecessary and divergent — drop it. Removes _chunk_by_size, _MAX_BATCH_BYTES, and the four chunking tests. _flush now POSTs the entire buffered batch as one request.
1 parent 681f078 commit b07e621

2 files changed

Lines changed: 8 additions & 81 deletions

File tree

socketsecurity/core/log_uploader.py

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626

2727
_FLUSH_GUARD = threading.local()
2828

29-
_MAX_BATCH_BYTES = 256 * 1024 - 1024 # depscan body cap is 256KB; reserve headroom for envelope/headers
30-
3129
_LEVEL_MAP = {
3230
logging.DEBUG: "DEBUG",
3331
logging.INFO: "INFO",
@@ -88,48 +86,22 @@ def _flush(self) -> None:
8886
with self._lock:
8987
if not self._buf:
9088
return
91-
entries = self._buf
89+
batch = self._buf
9290
self._buf = []
9391

9492
_FLUSH_GUARD.active = True
9593
try:
96-
for chunk in _chunk_by_size(entries):
97-
try:
98-
self._client.request(
99-
path=f"python-cli-runs/{self._run_id}/logs",
100-
method="POST",
101-
payload=json.dumps({"logs": chunk}),
102-
)
103-
except Exception as e:
104-
log.debug(f"log upload failed (swallowed, {len(chunk)} entries dropped): {e}")
94+
self._client.request(
95+
path=f"python-cli-runs/{self._run_id}/logs",
96+
method="POST",
97+
payload=json.dumps({"logs": batch}),
98+
)
99+
except Exception as e:
100+
log.debug(f"log upload failed (swallowed, {len(batch)} entries dropped): {e}")
105101
finally:
106102
_FLUSH_GUARD.active = False
107103

108104

109-
def _chunk_by_size(entries: list) -> list:
110-
"""Split entries into chunks that each serialize to <= _MAX_BATCH_BYTES.
111-
Single entries that exceed the cap are dropped with a debug log."""
112-
chunks: list = []
113-
current: list = []
114-
envelope = len('{"logs":[]}')
115-
current_size = envelope
116-
for entry in entries:
117-
entry_size = len(json.dumps(entry)) + 1 # +1 for inter-entry comma
118-
if entry_size + envelope > _MAX_BATCH_BYTES:
119-
log.debug(f"log entry too large ({entry_size}B), dropped")
120-
continue
121-
if current and current_size + entry_size > _MAX_BATCH_BYTES:
122-
chunks.append(current)
123-
current = [entry]
124-
current_size = envelope + entry_size
125-
else:
126-
current.append(entry)
127-
current_size += entry_size
128-
if current:
129-
chunks.append(current)
130-
return chunks
131-
132-
133105
class UploadingLogHandler(logging.Handler):
134106
def __init__(self, uploader: BatchedLogUploader, context: str = "socket-python-cli"):
135107
super().__init__()

tests/unit/test_log_uploader.py

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
from socketsecurity.core.cli_client import CliClient
99
from socketsecurity.core.exceptions import APIFailure
1010
from socketsecurity.core.log_uploader import (
11-
_MAX_BATCH_BYTES,
1211
BatchedLogUploader,
1312
UploadingLogHandler,
14-
_chunk_by_size,
1513
)
1614
from socketsecurity.core.socket_config import SocketConfig
1715

@@ -140,49 +138,6 @@ def test_levels_map_correctly():
140138
assert levels == ["DEBUG", "INFO", "WARN", "ERROR", "ERROR"]
141139

142140

143-
def test_chunk_by_size_keeps_small_batches_intact():
144-
entries = [{"timestamp": "t", "level": "INFO", "message": "x", "context": "c"}] * 5
145-
chunks = _chunk_by_size(entries)
146-
assert len(chunks) == 1
147-
assert chunks[0] == entries
148-
149-
150-
def test_chunk_by_size_splits_when_exceeding_cap():
151-
big_msg = "y" * 1000
152-
entries = [
153-
{"timestamp": "2026-05-07 22:30:00.000", "level": "INFO",
154-
"message": big_msg, "context": "c"}
155-
for _ in range(500)
156-
]
157-
chunks = _chunk_by_size(entries)
158-
assert len(chunks) >= 2
159-
for chunk in chunks:
160-
size = len(json.dumps({"logs": chunk}))
161-
assert size <= _MAX_BATCH_BYTES
162-
assert sum(len(c) for c in chunks) == len(entries)
163-
164-
165-
def test_chunk_by_size_drops_single_oversize_entry():
166-
too_big = {"timestamp": "t", "level": "INFO",
167-
"message": "z" * (_MAX_BATCH_BYTES + 100), "context": "c"}
168-
ok = {"timestamp": "t", "level": "INFO", "message": "ok", "context": "c"}
169-
chunks = _chunk_by_size([ok, too_big, ok])
170-
flat = [e for c in chunks for e in c]
171-
assert flat == [ok, ok] # too_big dropped, smalls preserved
172-
173-
174-
def test_flush_chunks_oversize_buffer_into_multiple_posts():
175-
client = Mock(spec=CliClient)
176-
u = BatchedLogUploader(client, "run-c", flush_interval=10)
177-
big_msg = "y" * 1000
178-
for _ in range(500):
179-
u.add({"timestamp": "2026-05-07 22:30:00.000", "level": "INFO",
180-
"message": big_msg, "context": "c"})
181-
182-
u._flush()
183-
assert client.request.call_count >= 2 # split into multiple POSTs
184-
185-
186141
def test_run_thread_flushes_periodically_then_exits():
187142
client = Mock(spec=CliClient)
188143
u = BatchedLogUploader(client, "run-t", flush_interval=0.05)

0 commit comments

Comments
 (0)