From 1da4ee36393fcdf028dd718b7e23d5ff5b92a786 Mon Sep 17 00:00:00 2001 From: Paddy Mullen Date: Wed, 20 May 2026 23:16:34 -0400 Subject: [PATCH 1/2] test(server): failing test for xorq infinite_request traceback leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #798: ``handle_infinite_request_xorq`` returns ``traceback.format_exc()`` in the ``error_info`` field unconditionally — leaking server source paths and stack frames to WS clients. The pandas-path handler in ``websocket_handler.py`` correctly gates this on ``BUCKAROO_DEBUG``. Test triggers the exception path with a sort key that doesn't exist in ``merged_sd`` (raises ``KeyError``), then asserts ``error_info`` does not start with ``"Traceback"``. Fails today; will pass once the xorq handler gates the traceback behind ``BUCKAROO_DEBUG``. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/unit/server/test_load_expr.py | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/unit/server/test_load_expr.py b/tests/unit/server/test_load_expr.py index e4d6cc9db..366310dd7 100644 --- a/tests/unit/server/test_load_expr.py +++ b/tests/unit/server/test_load_expr.py @@ -141,6 +141,46 @@ async def test_ws_search_pushdown(self): finally: shutil.rmtree(builds_root, ignore_errors=True) + @tornado.testing.gen_test + async def test_xorq_infinite_request_error_info_no_traceback(self): + """Regression for #798: the xorq path put ``traceback.format_exc()`` + into ``error_info`` unconditionally — leaking server-side source + paths to WS clients. Pandas path gates this behind + ``BUCKAROO_DEBUG``; xorq must too. + + Trigger the exception with a sort column that doesn't exist in + ``merged_sd`` — raises ``KeyError`` inside + ``handle_infinite_request_xorq``. + """ + builds_root = tempfile.mkdtemp() + try: + build_path = _build_expr_dir(builds_root) + await _post(self.get_http_port(), "/load_expr", + {"session": "lx-leak", "build_dir": build_path}) + + ws = await tornado.websocket.websocket_connect( + f"ws://localhost:{self.get_http_port()}/ws/lx-leak") + await ws.read_message() # discard initial_state + + ws.write_message(json.dumps({ + "type": "infinite_request", + "payload_args": {"start": 0, "end": 5, + "sort": "nonexistent_col", "sort_direction": "asc", + "sourceName": "default", "origEnd": 5}})) + + r = json.loads(await ws.read_message()) + self.assertEqual(r["type"], "infinite_resp") + self.assertIn("error_info", r) + # The bug: pre-fix this carries a full traceback starting + # with "Traceback (most recent call last):\n File ...". + # Production runs shouldn't leak source paths to clients. + self.assertFalse(r["error_info"].startswith("Traceback"), + f"error_info leaked traceback to client (first 200 chars): " + f"{r['error_info'][:200]!r}") + ws.close() + finally: + shutil.rmtree(builds_root, ignore_errors=True) + @tornado.testing.gen_test async def test_session_reuse_xorq_then_pandas(self): """A client that POSTs /load_expr and then POSTs /load with the From e141d7a7ad01a4a51d66a83761c9fa69e6a82390 Mon Sep 17 00:00:00 2001 From: Paddy Mullen Date: Wed, 20 May 2026 23:17:55 -0400 Subject: [PATCH 2/2] fix(server): gate xorq infinite_request error_info on BUCKAROO_DEBUG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #798. ``handle_infinite_request_xorq`` was returning ``traceback.format_exc()`` in the response's ``error_info`` field unconditionally, leaking server-side source paths and stack frames to WS clients in any production deployment. This patch mirrors the pandas WS-handler pattern at ``websocket_handler.py:241-245``: log the full traceback server-side (at ERROR level), but ship a generic ``"Request failed"`` to the client unless ``BUCKAROO_DEBUG=1`` is set. Pre-fix, the operator running a non-debug server *also* had no record of the error (the xorq handler didn't log). This patch fixes that gap too — same ``log.error`` shape as the pandas path. Test from the previous commit now passes; full ``test_load_expr.py`` suite green. Co-Authored-By: Claude Opus 4.7 (1M context) --- buckaroo/server/xorq_loading.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/buckaroo/server/xorq_loading.py b/buckaroo/server/xorq_loading.py index a6d32dfa5..f90852a95 100644 --- a/buckaroo/server/xorq_loading.py +++ b/buckaroo/server/xorq_loading.py @@ -8,6 +8,8 @@ """ from __future__ import annotations +import logging +import os import traceback from buckaroo.xorq_buckaroo import ( @@ -15,6 +17,12 @@ XorqInfiniteSampling, _XORQ_ANALYSIS_KLASSES, _expr_count, window_to_parquet) +# Mirrors ``websocket_handler._BUCKAROO_DEBUG`` — when set, error_info +# carries the full traceback for local debugging. Without it, clients see +# a generic message so source paths and stack frames don't leak. +_BUCKAROO_DEBUG = os.environ.get("BUCKAROO_DEBUG", "").lower() in ("1", "true") +log = logging.getLogger("buckaroo.server.xorq_loading") + class XorqServerDataflow(XorqDataflow): """Headless XorqDataflow with infinite sampling. @@ -87,5 +95,11 @@ def handle_infinite_request_xorq(xorq_dataflow: XorqServerDataflow, return ({"type": "infinite_resp", "key": payload_args, "data": [], "length": total_length}, parquet_bytes) except Exception: + tb = traceback.format_exc() + log.error("xorq infinite_request error: %s", tb) + # Mirrors the pandas-path gate in websocket_handler.py — clients + # in production runs see a generic message; only ``BUCKAROO_DEBUG`` + # opens the source-leak channel. return ({"type": "infinite_resp", "key": payload_args, "data": [], - "length": 0, "error_info": traceback.format_exc()}, b"") + "length": 0, + "error_info": tb if _BUCKAROO_DEBUG else "Request failed"}, b"")