Skip to content

Commit a71cb31

Browse files
lmeyerovclaude
andcommitted
test(gfql): cover LP1 explain diagnostics fall-back branches
Raise changed-line coverage on the gfql_explain diagnostics (LP1) from 76.7% to 91.4% (CI gate 80%): add tests for the cost-gate and not-index-coverable scan-fallback decision reasons, and a direct robustness test for the _seed_id_array / _seed_deg_sum helpers (degrade to None, never crash, since they run under the explain trace). Also drop _bail's unused `extra` parameter (dead code). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ad7673a commit a71cb31

2 files changed

Lines changed: 54 additions & 5 deletions

File tree

graphistry/compute/gfql/index/api.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,9 @@ def maybe_index_hop(
334334
except Exception:
335335
pass
336336

337-
def _bail(reason: str, extra: Optional[dict] = None) -> Optional[Plottable]:
337+
def _bail(reason: str) -> Optional[Plottable]:
338338
if trace:
339-
rec = {**diag, "path": "scan", "decision_reason": reason}
340-
if extra:
341-
rec.update(extra)
342-
_record(rec)
339+
_record({**diag, "path": "scan", "decision_reason": reason})
343340
return None
344341

345342
if policy == "off":

graphistry/tests/compute/gfql/index/test_index.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,58 @@ def test_explain_exposes_planner_diagnostics(graph, engine):
161161
assert rep_off["decision_reason"] == "policy=off", rep_off
162162

163163

164+
def test_seed_diagnostic_helpers_are_robust():
165+
"""LP1 helpers degrade to None instead of crashing on odd inputs (they run under
166+
the explain trace and must never take down a real query)."""
167+
from graphistry.compute.gfql.index.api import _seed_id_array, _seed_deg_sum
168+
169+
class _Col: # a seed column with .values but no .to_numpy() (fallback branch)
170+
values = np.array([1, 2, 3])
171+
172+
class _Frame:
173+
def __getitem__(self, k):
174+
return _Col()
175+
176+
assert list(_seed_id_array(_Frame(), "id")) == [1, 2, 3]
177+
assert _seed_id_array(None, "id") is None # nodes[None] raises → None, not a crash
178+
179+
class _BadIdx: # missing keys_sorted/group_offsets → None, not AttributeError
180+
pass
181+
182+
assert _seed_deg_sum(_BadIdx(), np.array([0, 1])) is None
183+
184+
185+
@pytest.mark.parametrize("engine", ENGINES)
186+
def test_explain_decision_reasons_for_scan_fallbacks(engine):
187+
"""LP1: when the planner declines the index it records *why*, so a silent scan is
188+
diagnosable. Covers the two fall-back branches: (a) a frontier past the cost gate,
189+
(b) a query the index doesn't cover (min_hops>1)."""
190+
from graphistry.compute.gfql.index import index_trace
191+
rng = np.random.default_rng(2)
192+
N, deg = 1000, 6
193+
edf = pd.DataFrame({"src": rng.integers(0, N, N * deg), "dst": rng.integers(0, N, N * deg)})
194+
ndf = pd.DataFrame({"id": np.arange(N)})
195+
g = graphistry.nodes(ndf, "id").edges(edf, "src", "dst")
196+
gi = g.gfql_index_all(engine=engine)
197+
198+
# (a) cost-gate fallback: frontier = all keys >> frac*n_keys → scan, with a reason
199+
allseeds = pd.DataFrame({"id": np.arange(N, dtype=np.int64)})
200+
with index_trace() as steps:
201+
gi.hop(nodes=allseeds, engine=engine, hops=1, direction="forward")
202+
assert any("scan cheaper" in (s.get("decision_reason") or "") for s in steps), (engine, steps)
203+
assert not any(s.get("path") == "index" for s in steps), (engine, steps)
204+
205+
# (b) not-coverable fallback: a feature outside the index fast path (zero-hop seed).
206+
# pandas-only: the Phase-1 polars hop rejects these features at its own engine layer
207+
# before the index planner is consulted, so the index "not-coverable" bail is
208+
# reachable via the pandas hop path here.
209+
if engine == "pandas":
210+
few = pd.DataFrame({"id": np.arange(4, dtype=np.int64)})
211+
with index_trace() as steps2:
212+
gi.hop(nodes=few, engine=engine, hops=1, direction="forward", include_zero_hop_seed=True)
213+
assert any(s.get("decision_reason") == "query not index-coverable" for s in steps2), (engine, steps2)
214+
215+
164216
@pytest.mark.parametrize("engine", ENGINES)
165217
def test_cost_gate_engine_aware_never_loses_to_scan(engine):
166218
"""F1: the index-vs-scan crossover depends on scan speed, so the cost gate

0 commit comments

Comments
 (0)