test(gfql): cover LP1 explain diagnostics fall-back branches

lmeyerov · claude · lmeyerov · commit a71cb31e3ba8 · 2026-07-02T16:38:26.000-07:00
Raise changed-line coverage on the gfql_explain diagnostics (LP1) from
76.7% to 91.4% (CI gate 80%): add tests for the cost-gate and
not-index-coverable scan-fallback decision reasons, and a direct
robustness test for the _seed_id_array / _seed_deg_sum helpers (degrade
to None, never crash, since they run under the explain trace). Also drop
_bail's unused `extra` parameter (dead code).

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/graphistry/compute/gfql/index/api.py b/graphistry/compute/gfql/index/api.py
@@ -334,12 +334,9 @@ def maybe_index_hop(
         except Exception:
             pass
 
-    def _bail(reason: str, extra: Optional[dict] = None) -> Optional[Plottable]:
+    def _bail(reason: str) -> Optional[Plottable]:
         if trace:
-            rec = {**diag, "path": "scan", "decision_reason": reason}
-            if extra:
-                rec.update(extra)
-            _record(rec)
+            _record({**diag, "path": "scan", "decision_reason": reason})
         return None
 
     if policy == "off":
diff --git a/graphistry/tests/compute/gfql/index/test_index.py b/graphistry/tests/compute/gfql/index/test_index.py
@@ -161,6 +161,58 @@ def test_explain_exposes_planner_diagnostics(graph, engine):
     assert rep_off["decision_reason"] == "policy=off", rep_off
 
 
+def test_seed_diagnostic_helpers_are_robust():
+    """LP1 helpers degrade to None instead of crashing on odd inputs (they run under
+    the explain trace and must never take down a real query)."""
+    from graphistry.compute.gfql.index.api import _seed_id_array, _seed_deg_sum
+
+    class _Col:  # a seed column with .values but no .to_numpy() (fallback branch)
+        values = np.array([1, 2, 3])
+
+    class _Frame:
+        def __getitem__(self, k):
+            return _Col()
+
+    assert list(_seed_id_array(_Frame(), "id")) == [1, 2, 3]
+    assert _seed_id_array(None, "id") is None  # nodes[None] raises → None, not a crash
+
+    class _BadIdx:  # missing keys_sorted/group_offsets → None, not AttributeError
+        pass
+
+    assert _seed_deg_sum(_BadIdx(), np.array([0, 1])) is None
+
+
+@pytest.mark.parametrize("engine", ENGINES)
+def test_explain_decision_reasons_for_scan_fallbacks(engine):
+    """LP1: when the planner declines the index it records *why*, so a silent scan is
+    diagnosable. Covers the two fall-back branches: (a) a frontier past the cost gate,
+    (b) a query the index doesn't cover (min_hops>1)."""
+    from graphistry.compute.gfql.index import index_trace
+    rng = np.random.default_rng(2)
+    N, deg = 1000, 6
+    edf = pd.DataFrame({"src": rng.integers(0, N, N * deg), "dst": rng.integers(0, N, N * deg)})
+    ndf = pd.DataFrame({"id": np.arange(N)})
+    g = graphistry.nodes(ndf, "id").edges(edf, "src", "dst")
+    gi = g.gfql_index_all(engine=engine)
+
+    # (a) cost-gate fallback: frontier = all keys >> frac*n_keys → scan, with a reason
+    allseeds = pd.DataFrame({"id": np.arange(N, dtype=np.int64)})
+    with index_trace() as steps:
+        gi.hop(nodes=allseeds, engine=engine, hops=1, direction="forward")
+    assert any("scan cheaper" in (s.get("decision_reason") or "") for s in steps), (engine, steps)
+    assert not any(s.get("path") == "index" for s in steps), (engine, steps)
+
+    # (b) not-coverable fallback: a feature outside the index fast path (zero-hop seed).
+    # pandas-only: the Phase-1 polars hop rejects these features at its own engine layer
+    # before the index planner is consulted, so the index "not-coverable" bail is
+    # reachable via the pandas hop path here.
+    if engine == "pandas":
+        few = pd.DataFrame({"id": np.arange(4, dtype=np.int64)})
+        with index_trace() as steps2:
+            gi.hop(nodes=few, engine=engine, hops=1, direction="forward", include_zero_hop_seed=True)
+        assert any(s.get("decision_reason") == "query not index-coverable" for s in steps2), (engine, steps2)
+
+
 @pytest.mark.parametrize("engine", ENGINES)
 def test_cost_gate_engine_aware_never_loses_to_scan(engine):
     """F1: the index-vs-scan crossover depends on scan speed, so the cost gate