cdeust
diff --git a/‎mcp_server/core/workflow_graph_builder_relational.py‎
Lines changed: 13 additions & 0 deletions b/‎mcp_server/core/workflow_graph_builder_relational.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎mcp_server/core/workflow_graph_entity.py‎
Lines changed: 5 additions & 0 deletions b/‎mcp_server/core/workflow_graph_entity.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎mcp_server/core/workflow_graph_schema.py‎
Lines changed: 63 additions & 1 deletion b/‎mcp_server/core/workflow_graph_schema.py‎
Lines changed: 63 additions & 1 deletion
diff --git a/‎mcp_server/infrastructure/workflow_graph_source_ast.py‎
Lines changed: 49 additions & 7 deletions b/‎mcp_server/infrastructure/workflow_graph_source_ast.py‎
Lines changed: 49 additions & 7 deletions
diff --git a/‎mcp_server/server/http_standalone_graph.py‎
Lines changed: 16 additions & 1 deletion b/‎mcp_server/server/http_standalone_graph.py‎
Lines changed: 16 additions & 1 deletion
@@ -29,6 +29,7 @@
     ToolKind,
     WorkflowEdge,
     WorkflowNode,
+    edge_provenance_defaults,
 )
 from mcp_server.core.workflow_graph_schema_enums import PrimaryToolCluster
 
@@ -330,11 +331,15 @@ def ingest_symbol(b, sym: dict) -> None:
         line=sym.get("line"),
         path=str(file_path),
     )
+    # Gap 6: defaults from the central provenance table.
+    conf, reason = edge_provenance_defaults(EdgeKind.DEFINED_IN.value)
     b._edges.append(
         WorkflowEdge(
             source=sid,
             target=fid,
             kind=EdgeKind.DEFINED_IN,
+            confidence=conf,
+            reason=reason,
         )
     )
 
@@ -370,11 +375,19 @@ def ingest_ast_edge(b, edge: dict) -> None:
             return
         edge_kind = EdgeKind.CALLS if kind == "calls" else EdgeKind.MEMBER_OF
 
+    # Gap 6: central provenance defaults — single source of truth.
+    conf, reason = edge_provenance_defaults(
+        edge_kind.value,
+        ap_confidence=edge.get("confidence"),
+        ap_reason=edge.get("reason"),
+    )
     b._edges.append(
         WorkflowEdge(
             source=src_id,
             target=dst_id,
             kind=edge_kind,
+            confidence=conf,
+            reason=reason,
         )
     )
 
 
@@ -26,6 +26,7 @@
     NodeIdFactory,
     NodeKind,
     WorkflowEdge,
+    edge_provenance_defaults,
 )
 
 
@@ -82,11 +83,15 @@ def ingest_about_entity(b, link: dict) -> None:
     ent_id = NodeIdFactory.entity_id(ent_pg)
     if mem_id not in b._nodes or ent_id not in b._nodes:
         return
+    # Gap 6: shared provenance defaults.
+    conf, reason = edge_provenance_defaults(EdgeKind.ABOUT_ENTITY.value)
     b._edges.append(
         WorkflowEdge(
             source=mem_id,
             target=ent_id,
             kind=EdgeKind.ABOUT_ENTITY,
+            confidence=conf,
+            reason=reason,
         )
     )
 
 
@@ -83,7 +83,20 @@ class WorkflowNode(BaseModel):
 
 
 class WorkflowEdge(BaseModel):
-    """A directed edge in the workflow graph."""
+    """A directed edge in the workflow graph.
+
+    ``confidence`` (0.0–1.0) signals how trustworthy the edge is: 1.0 for
+    edges derived from direct AST facts (``defined_in``, ``member_of``),
+    ≤0.9 for heuristic resolution (unqualified call target), and lower
+    for inferred or cross-file edges. Callers that don't compute a
+    confidence leave the field ``None``.
+
+    ``reason`` is a short free-form tag describing WHY the edge was
+    emitted — e.g. ``"direct-ast"``, ``"import-scope-lookup"``,
+    ``"same-file-fallback"``, ``"heat-link"``. The renderer surfaces it
+    in the detail panel so a reader can tell a structural fact from a
+    statistical hint without opening the source.
+    """
 
     model_config = ConfigDict(extra="ignore", use_enum_values=True)
 
@@ -92,6 +105,11 @@ class WorkflowEdge(BaseModel):
     kind: EdgeKind
     weight: float = 1.0
     label: str | None = None
+    # ``ge=0.0, le=1.0`` is a contract the renderer relies on — any
+    # producer emitting a value outside [0, 1] is a bug and pydantic
+    # raises at construction so the drift is caught immediately.
+    confidence: float | None = Field(default=None, ge=0.0, le=1.0)
+    reason: str | None = None
 
 
 # ── Deterministic ID factory ───────────────────────────────────────────
@@ -162,6 +180,50 @@ def entity_id(pg_id: str | int) -> str:
         return f"entity:{pg_id}"
 
 
+# ── Edge provenance defaults (Gap 6) ──────────────────────────────────
+
+
+# Convention — NOT measured constants. Structural AST facts (symbol →
+# file definition, method-of-class containment) are ground-truth by
+# definition: the parser either sees them or it doesn't. ``about_entity``
+# links are materialised from the persisted ``memory_entities`` join
+# table so they are equally definitive. Heuristic edges (``calls`` /
+# ``imports``) carry the resolver's actual confidence score or ``None``
+# when AP didn't emit one.
+_STRUCTURAL_DEFAULTS: dict[str, tuple[float, str]] = {
+    "defined_in": (1.0, "direct-ast"),
+    "member_of": (1.0, "direct-ast"),
+    "about_entity": (1.0, "memory-entities-link"),
+}
+
+
+def edge_provenance_defaults(
+    edge_kind: str,
+    ap_confidence: float | None = None,
+    ap_reason: str | None = None,
+) -> tuple[float | None, str | None]:
+    """Return the (confidence, reason) pair for an edge of ``edge_kind``.
+
+    Producer-supplied AP values win: if ``ap_confidence`` or
+    ``ap_reason`` is given, they are preserved verbatim. Otherwise the
+    structural defaults in ``_STRUCTURAL_DEFAULTS`` apply. Edges whose
+    kind isn't in that table (currently ``calls`` / ``imports``) keep
+    ``None`` when AP didn't annotate — they are heuristic and their
+    absence of confidence is itself information.
+
+    An empty-string reason is normalised to ``None`` so the builder
+    path and the parallel inline path in ``http_standalone_graph``
+    never disagree on its shape.
+    """
+    kind_str = str(edge_kind)
+    default_conf, default_reason = _STRUCTURAL_DEFAULTS.get(kind_str, (None, None))
+    confidence = ap_confidence if ap_confidence is not None else default_conf
+    reason = ap_reason if ap_reason else default_reason
+    if reason == "":
+        reason = None
+    return confidence, reason
+
+
 # ── Validation (meta-rules that decide well-formedness) ────────────────
 
 
 
@@ -537,15 +537,38 @@ def _match(file_part: str) -> bool:
                 for p in path_tails
             )
 
-        async def _run_edge(kind: str, table: str, src_lbl: str, dst_lbl: str):
+        async def _run_edge(
+            kind: str,
+            table: str,
+            src_lbl: str,
+            dst_lbl: str,
+            has_provenance: bool,
+        ):
+            """Query AP for edges of ``kind`` in ``table``.
+
+            ``has_provenance`` gates whether to fetch ``r.confidence`` +
+            ``r.resolution_method``: Kuzu raises a Binder exception on
+            missing-property access, so we only request those columns
+            for rel tables the AP resolver actually annotates (Calls_*
+            / Imports_* / Implements_* / Extends_* / Uses_*). Structural
+            tables (HasMethod_* / Defines_*) have no such columns —
+            callers default confidence to 1.0 for those kinds instead.
+            """
             if src_lbl == "File":
                 select_src = "src.id AS src_name"
             else:
                 select_src = "src.qualified_name AS src_name"
+            if has_provenance:
+                return_tail = (
+                    "       dst.qualified_name AS dst_name, "
+                    "       r.confidence       AS confidence, "
+                    "       r.resolution_method AS reason"
+                )
+            else:
+                return_tail = "       dst.qualified_name AS dst_name"
             query = (
-                f"MATCH (src:{src_lbl})-[:{table}]->(dst:{dst_lbl}) "
-                f"RETURN {select_src}, "
-                "       dst.qualified_name AS dst_name"
+                f"MATCH (src:{src_lbl})-[r:{table}]->(dst:{dst_lbl}) "
+                f"RETURN {select_src}, {return_tail}"
             )
             rows = await self._bridge.call(
                 "query_graph",
@@ -570,22 +593,41 @@ async def _run_edge(kind: str, table: str, src_lbl: str, dst_lbl: str):
                 else:
                     if not (_match(src_file) and _match(dst_file)):
                         continue
+                # AP stores ``resolution_method`` wrapped in literal
+                # single quotes (see ``automatised-pipeline``
+                # resolver.rs:183 — ``format!("'{method}'")``), so the
+                # value comes back INCLUDING quotes. Strip them here at
+                # the infrastructure boundary. Remove this strip once
+                # AP fixes the upstream quoting.
+                conf_raw = r.get("confidence") if has_provenance else None
+                try:
+                    confidence = float(conf_raw) if conf_raw is not None else None
+                except (TypeError, ValueError):
+                    confidence = None
+                reason_raw = r.get("reason") if has_provenance else None
+                reason_str = (
+                    str(reason_raw).strip("'\"") or None if reason_raw else None
+                )
                 out.append(
                     {
                         "kind": kind,
                         "src_file": src_file,
                         "src_name": src_qn,
                         "dst_file": dst_file,
                         "dst_name": dst,
+                        "confidence": confidence,
+                        "reason": reason_str,
                     }
                 )
 
         for s, d in calls_rels:
-            await _run_edge("calls", f"Calls_{s}_{d}", s, d)
+            await _run_edge("calls", f"Calls_{s}_{d}", s, d, has_provenance=True)
         for s, d in imports_rels:
-            await _run_edge("imports", f"Imports_{s}_{d}", s, d)
+            await _run_edge("imports", f"Imports_{s}_{d}", s, d, has_provenance=True)
         for s, d in member_rels:
-            await _run_edge("member_of", f"HasMethod_{s}_{d}", s, d)
+            await _run_edge(
+                "member_of", f"HasMethod_{s}_{d}", s, d, has_provenance=False
+            )
         return out
 
 
 
@@ -543,7 +543,10 @@ def _edges_for(node_ids: set[str]):
                 SYMBOL_COLOR_DEFAULT,
                 SYMBOL_COLORS,
             )
-            from mcp_server.core.workflow_graph_schema import NodeIdFactory
+            from mcp_server.core.workflow_graph_schema import (
+                NodeIdFactory,
+                edge_provenance_defaults,
+            )
             from mcp_server.infrastructure.ap_bridge import (
                 is_enabled as _ap_enabled,
                 resolve_graph_paths,
@@ -807,13 +810,17 @@ async def _load_with_timeout(gp_):
                     )
                     parent = file_id_by_path.get(fp)
                     if parent:
+                        # Gap 6: shared provenance defaults.
+                        di_conf, di_reason = edge_provenance_defaults("defined_in")
                         proj_edges.append(
                             {
                                 "source": sid,
                                 "target": parent,
                                 "kind": "defined_in",
                                 "type": "defined_in",
                                 "weight": 1.0,
+                                "confidence": di_conf,
+                                "reason": di_reason,
                             }
                         )
                 for e in edgs:
@@ -833,12 +840,20 @@ async def _load_with_timeout(gp_):
                         if not sf or not sn:
                             continue
                         sid = NodeIdFactory.symbol_id(sf, sn)
+                    # Gap 6: single source-of-truth defaults.
+                    conf, reason_v = edge_provenance_defaults(
+                        kind,
+                        ap_confidence=e.get("confidence"),
+                        ap_reason=e.get("reason"),
+                    )
                     edge = {
                         "source": sid,
                         "target": did,
                         "kind": kind,
                         "type": kind,
                         "weight": 1.0,
+                        "confidence": conf,
+                        "reason": reason_v,
                     }
                     # Intra-project iff both endpoints (where they are
                     # symbols) belong to THIS project. For `imports`