feat: Wire Temporal orchestrator into hybrid_analyzer pipeline

devatsecure · claude · devatsecure · commit 63d5aadf2d40 · 2026-02-16T13:46:01.000+05:00
Added _try_temporal_execution method with config toggle (enable_temporal).
Gracefully falls back to direct execution when temporalio not installed
or Temporal server unreachable. 13 tests covering all paths.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/scripts/hybrid_analyzer.py b/scripts/hybrid_analyzer.py
@@ -168,6 +168,18 @@
     _MCP_IMPORT_OK = False
     _MCP_LIB_OK = False
 
+# Temporal orchestrator (optional execution backend)
+try:
+    from temporal_orchestrator import (
+        AuditWorkflowRunner,
+        PipelineActivities,
+    )
+
+    _TEMPORAL_IMPORT_OK = True
+except ImportError:
+    _TEMPORAL_IMPORT_OK = False
+    _TEMPORAL_LIB_OK = False
+
 
 class HybridSecurityAnalyzer:
     """
@@ -580,19 +592,27 @@ def __init__(
 
         # Validation: At least one scanner or AI enrichment must be enabled
         active_features = [
-            name for name in (
-                "semgrep", "trivy", "checkov", "api_security", "dast",
-                "supply_chain", "fuzzing", "threat_intel", "remediation",
-                "runtime_security", "regression_testing", "ai_enrichment",
-                "nuclei_templates", "zap_baseline",
+            name
+            for name in (
+                "semgrep",
+                "trivy",
+                "checkov",
+                "api_security",
+                "dast",
+                "supply_chain",
+                "fuzzing",
+                "threat_intel",
+                "remediation",
+                "runtime_security",
+                "regression_testing",
+                "ai_enrichment",
+                "nuclei_templates",
+                "zap_baseline",
             )
             if getattr(self, f"enable_{name}", False)
         ]
         if not active_features:
-            raise ValueError(
-                "At least one tool must be enabled! "
-                "Use --help to see available scanner flags."
-            )
+            raise ValueError("At least one tool must be enabled! Use --help to see available scanner flags.")
 
     def analyze(
         self, target_path: str, output_dir: Optional[str] = None, severity_filter: Optional[list[str]] = None
@@ -629,9 +649,24 @@ def analyze(
         logger.info("Tools: %s", self._get_enabled_tools())
         logger.info("")
 
+        # -- Temporal execution backend (optional) --
+        if self.config.get("enable_temporal", False):
+            temporal_result = self._try_temporal_execution(
+                target_path=target_path,
+                output_dir=output_dir,
+                severity_filter=severity_filter,
+            )
+            if temporal_result is not None:
+                return temporal_result
+            # Fall-through: Temporal was requested but unavailable/failed.
+            # The warning was already logged inside _try_temporal_execution.
+
         # -- PHASE 0: MCP Server Status --
         if self._mcp_started:
-            logger.info("Phase 0: MCP server is running (background thread: %s)", self._mcp_thread.name if self._mcp_thread else "unknown")
+            logger.info(
+                "Phase 0: MCP server is running (background thread: %s)",
+                self._mcp_thread.name if self._mcp_thread else "unknown",
+            )
         elif self.config.get("enable_mcp_server", False):
             logger.info("Phase 0: MCP server enabled but not running (startup may have failed)")
 
@@ -678,9 +713,27 @@ def analyze(
                 # scan_codebase expects list of {"path": ..., "content": ...} dicts
                 _heuristic_files = []
                 _target = Path(target_path)
-                _heuristic_exts = {".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".go", ".rb", ".yml", ".yaml", ".json", ".tf"}
+                _heuristic_exts = {
+                    ".py",
+                    ".js",
+                    ".ts",
+                    ".tsx",
+                    ".jsx",
+                    ".java",
+                    ".go",
+                    ".rb",
+                    ".yml",
+                    ".yaml",
+                    ".json",
+                    ".tf",
+                }
                 for fp in _target.rglob("*"):
-                    if fp.is_file() and fp.suffix in _heuristic_exts and ".git" not in fp.parts and "node_modules" not in fp.parts:
+                    if (
+                        fp.is_file()
+                        and fp.suffix in _heuristic_exts
+                        and ".git" not in fp.parts
+                        and "node_modules" not in fp.parts
+                    ):
                         with contextlib.suppress(Exception):
                             _heuristic_files.append({"path": str(fp), "content": fp.read_text(errors="ignore")})
                         if len(_heuristic_files) >= 500:
@@ -813,6 +866,95 @@ def analyze(
 
         return result
 
+    # ------------------------------------------------------------------
+    # Temporal execution backend
+    # ------------------------------------------------------------------
+
+    def _try_temporal_execution(
+        self,
+        target_path: str,
+        output_dir: Optional[str],
+        severity_filter: Optional[list[str]],
+    ) -> Optional[HybridScanResult]:
+        """Attempt to run the pipeline via the Temporal orchestrator.
+
+        Returns a ``HybridScanResult`` if Temporal execution succeeds, or
+        ``None`` if Temporal is unavailable / fails so the caller should
+        fall back to direct execution.
+
+        Graceful degradation hierarchy:
+        1. ``temporal_orchestrator`` module not importable -> warn, return None
+        2. ``temporalio`` library not installed -> warn, return None
+        3. Workflow execution raises any exception -> warn, return None
+        """
+        if not _TEMPORAL_IMPORT_OK:
+            logger.warning(
+                "Temporal enabled in config but temporal_orchestrator module "
+                "could not be imported. Falling back to direct execution."
+            )
+            return None
+
+        retry_mode = self.config.get("temporal_retry_mode", "production")
+
+        try:
+            runner = AuditWorkflowRunner(
+                activities=PipelineActivities(config=self.config),
+                retry_mode=retry_mode,
+            )
+            logger.info("Running pipeline via Temporal orchestrator (mode=%s)", retry_mode)
+            runner.run(repo_path=target_path, config=self.config)
+
+            # Log summary from Temporal execution
+            summary = runner.get_summary()
+            logger.info(
+                "Temporal workflow completed: %d/%d phases succeeded",
+                summary.get("completed_phases", 0),
+                summary.get("total_phases", 0),
+            )
+            for pname, pdetail in summary.get("phases", {}).items():
+                status = pdetail.get("status", "unknown")
+                duration = pdetail.get("duration_seconds", 0.0)
+                if status == "failed":
+                    logger.warning(
+                        "  Phase %s: %s (%.1fs) — %s",
+                        pname,
+                        status,
+                        duration,
+                        pdetail.get("error", ""),
+                    )
+                else:
+                    logger.info("  Phase %s: %s (%.1fs)", pname, status, duration)
+
+            # After Temporal execution, run the normal analyze() path for the
+            # full result assembly.  Temporal adds crash-recovery and retry
+            # semantics around the same phase logic; the final reporting still
+            # goes through the standard code path.
+            #
+            # Re-invoke analyze() with Temporal disabled to avoid recursion
+            # and get the full HybridScanResult with SARIF/JSON/Markdown.
+            original_toggle = self.config.get("enable_temporal", False)
+            self.config["enable_temporal"] = False
+            try:
+                result = self.analyze(
+                    target_path=target_path,
+                    output_dir=output_dir,
+                    severity_filter=severity_filter,
+                )
+            finally:
+                self.config["enable_temporal"] = original_toggle
+
+            # Attach Temporal workflow metadata to the result
+            result.__dict__["temporal_summary"] = summary
+
+            return result
+
+        except Exception as exc:
+            logger.warning(
+                "Temporal execution failed: %s. Falling back to direct execution.",
+                exc,
+            )
+            return None
+
     # ------------------------------------------------------------------
     # Vulnerability enrichment pipeline (v2.0)
     # ------------------------------------------------------------------
@@ -835,7 +977,9 @@ def _enrich_findings(self, findings: list[HybridFinding], target_path: str) -> l
         from enrichment_pipeline import run_enrichment_pipeline
 
         finding_dicts, _enrichment_meta = run_enrichment_pipeline(
-            finding_dicts, self.config, target_path,
+            finding_dicts,
+            self.config,
+            target_path,
         )
 
         # -- License risk scoring (hybrid_analyzer-specific, SBOM-based) --
@@ -847,10 +991,12 @@ def _enrich_findings(self, findings: list[HybridFinding], target_path: str) -> l
                     pkg = fd.get("cve_id") and fd.get("title", "")
                     if pkg and " in " in pkg:
                         pkg_name = pkg.split(" in ")[-1].strip()
-                        components.append({
-                            "name": pkg_name,
-                            "version": fd.get("installed_version", "unknown"),
-                        })
+                        components.append(
+                            {
+                                "name": pkg_name,
+                                "version": fd.get("installed_version", "unknown"),
+                            }
+                        )
                 if components:
                     risks = license_scorer.score_components(components)
                     if risks:
diff --git a/scripts/temporal_orchestrator.py b/scripts/temporal_orchestrator.py
@@ -8,6 +8,7 @@
 
 Requires: temporalio>=1.7.0 (optional dependency)
 """
+
 from __future__ import annotations
 
 import logging
@@ -254,19 +255,15 @@ def __init__(
         retry_mode: str = "production",
     ):
         self._activities = activities or PipelineActivities()
-        self._retry_policy = RETRY_POLICIES.get(
-            retry_mode, RETRY_POLICIES["production"]
-        )
+        self._retry_policy = RETRY_POLICIES.get(retry_mode, RETRY_POLICIES["production"])
         self._phase_results: dict[str, PhaseResult] = {}
 
     @property
     def phase_results(self) -> dict[str, PhaseResult]:
         """Return results from all completed phases."""
         return dict(self._phase_results)
 
-    def run(
-        self, repo_path: str, config: dict[str, Any] | None = None
-    ) -> dict[str, PhaseResult]:
+    def run(self, repo_path: str, config: dict[str, Any] | None = None) -> dict[str, PhaseResult]:
         """Execute all 6 phases sequentially.
 
         Each phase receives the output of the previous phase.
@@ -303,9 +300,7 @@ def run(
                 self._phase_results[phase_name] = result
 
                 if result.status == "failed":
-                    logger.error(
-                        "Phase %s failed: %s", phase_name, result.error
-                    )
+                    logger.error("Phase %s failed: %s", phase_name, result.error)
                     # In strict mode, halt on any failure
                     if config.get("phase_gate_strict", False):
                         break
@@ -342,16 +337,8 @@ def get_summary(self) -> dict[str, Any]:
         """
         return {
             "total_phases": len(PIPELINE_PHASES),
-            "completed_phases": sum(
-                1
-                for r in self._phase_results.values()
-                if r.status == "success"
-            ),
-            "failed_phases": sum(
-                1
-                for r in self._phase_results.values()
-                if r.status == "failed"
-            ),
+            "completed_phases": sum(1 for r in self._phase_results.values() if r.status == "success"),
+            "failed_phases": sum(1 for r in self._phase_results.values() if r.status == "failed"),
             "phases": {
                 name: {
                     "status": result.status,
@@ -386,10 +373,7 @@ async def create_temporal_client(server: str = "localhost:7233") -> Any:
         If ``temporalio`` is not installed.
     """
     if not TEMPORAL_AVAILABLE:
-        raise RuntimeError(
-            "temporalio package not installed. "
-            "Install with: pip install temporalio>=1.7.0"
-        )
+        raise RuntimeError("temporalio package not installed. Install with: pip install temporalio>=1.7.0")
     return await Client.connect(server)
 
 
@@ -415,10 +399,7 @@ async def start_temporal_worker(
         If ``temporalio`` is not installed.
     """
     if not TEMPORAL_AVAILABLE:
-        raise RuntimeError(
-            "temporalio package not installed. "
-            "Install with: pip install temporalio>=1.7.0"
-        )
+        raise RuntimeError("temporalio package not installed. Install with: pip install temporalio>=1.7.0")
     activities_instance = PipelineActivities()
     return Worker(
         client,
diff --git a/tests/unit/test_temporal_integration.py b/tests/unit/test_temporal_integration.py