Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion buckaroo/server/xorq_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,21 @@
"""
from __future__ import annotations

import logging
import os
import traceback

from buckaroo.xorq_buckaroo import (
NoCleaningConfXorq, XorqAutocleaning, XorqDataflow, XorqDfStatsV2,
XorqInfiniteSampling, _XORQ_ANALYSIS_KLASSES, _expr_count,
window_to_parquet)

# Mirrors ``websocket_handler._BUCKAROO_DEBUG`` — when set, error_info
# carries the full traceback for local debugging. Without it, clients see
# a generic message so source paths and stack frames don't leak.
_BUCKAROO_DEBUG = os.environ.get("BUCKAROO_DEBUG", "").lower() in ("1", "true")
log = logging.getLogger("buckaroo.server.xorq_loading")


class XorqServerDataflow(XorqDataflow):
"""Headless XorqDataflow with infinite sampling.
Expand Down Expand Up @@ -87,5 +95,11 @@ def handle_infinite_request_xorq(xorq_dataflow: XorqServerDataflow,
return ({"type": "infinite_resp", "key": payload_args, "data": [],
"length": total_length}, parquet_bytes)
except Exception:
tb = traceback.format_exc()
log.error("xorq infinite_request error: %s", tb)
# Mirrors the pandas-path gate in websocket_handler.py — clients
# in production runs see a generic message; only ``BUCKAROO_DEBUG``
# opens the source-leak channel.
return ({"type": "infinite_resp", "key": payload_args, "data": [],
"length": 0, "error_info": traceback.format_exc()}, b"")
"length": 0,
"error_info": tb if _BUCKAROO_DEBUG else "Request failed"}, b"")
40 changes: 40 additions & 0 deletions tests/unit/server/test_load_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,46 @@ async def test_ws_search_pushdown(self):
finally:
shutil.rmtree(builds_root, ignore_errors=True)

@tornado.testing.gen_test
async def test_xorq_infinite_request_error_info_no_traceback(self):
"""Regression for #798: the xorq path put ``traceback.format_exc()``
into ``error_info`` unconditionally — leaking server-side source
paths to WS clients. Pandas path gates this behind
``BUCKAROO_DEBUG``; xorq must too.

Trigger the exception with a sort column that doesn't exist in
``merged_sd`` — raises ``KeyError`` inside
``handle_infinite_request_xorq``.
"""
builds_root = tempfile.mkdtemp()
try:
build_path = _build_expr_dir(builds_root)
await _post(self.get_http_port(), "/load_expr",
{"session": "lx-leak", "build_dir": build_path})

ws = await tornado.websocket.websocket_connect(
f"ws://localhost:{self.get_http_port()}/ws/lx-leak")
await ws.read_message() # discard initial_state

ws.write_message(json.dumps({
"type": "infinite_request",
"payload_args": {"start": 0, "end": 5,
"sort": "nonexistent_col", "sort_direction": "asc",
"sourceName": "default", "origEnd": 5}}))

r = json.loads(await ws.read_message())
self.assertEqual(r["type"], "infinite_resp")
self.assertIn("error_info", r)
# The bug: pre-fix this carries a full traceback starting
# with "Traceback (most recent call last):\n File ...".
# Production runs shouldn't leak source paths to clients.
self.assertFalse(r["error_info"].startswith("Traceback"),
f"error_info leaked traceback to client (first 200 chars): "
f"{r['error_info'][:200]!r}")
ws.close()
finally:
shutil.rmtree(builds_root, ignore_errors=True)

@tornado.testing.gen_test
async def test_session_reuse_xorq_then_pandas(self):
"""A client that POSTs /load_expr and then POSTs /load with the
Expand Down
Loading