Merge branch 'develop' into feature/implement-hamilton-framework-with-sdlc-11-10-26

2-Coatl · web-flow · commit d6c12438369e · 2025-11-13T05:14:16.000-06:00
diff --git a/scripts/coding/ai/examples/hamilton_llm/__init__.py b/scripts/coding/ai/examples/hamilton_llm/__init__.py
@@ -0,0 +1,12 @@
+"""Hamilton-inspired LLM pipeline example for the IACT project."""
+
+from . import dataflow
+from .driver import HamiltonDriver, MissingDependencyError
+from .llm_client import MockLLMClient
+
+__all__ = [
+    "dataflow",
+    "HamiltonDriver",
+    "MissingDependencyError",
+    "MockLLMClient",
+]
diff --git a/scripts/coding/ai/examples/hamilton_llm/dataflow.py b/scripts/coding/ai/examples/hamilton_llm/dataflow.py
@@ -0,0 +1,128 @@
+"""Declarative dataflow modeling the Data → Prompt → LLM → $ pipeline.
+
+The module captures the pace differences between aplicaciones ML tradicionales y
+aplicaciones LLM, destacando que ambas requieren habilidades fuertes de
+ingeniería de software. Cada función sigue el paradigma Hamilton: el nombre es
+el output y los argumentos son las dependencias explícitas.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+from .llm_client import MockLLMClient
+
+PACE_OF_DEVELOPMENT: Dict[str, List[str]] = {
+    "traditional_ml": [
+        "Idea & Data/Resources",
+        "Design",
+        "Development/Prototype",
+        "Model Development",
+        "Getting to Production",
+        "Operations",
+        "Maintenance & Business Value",
+    ],
+    "llm_apps": [
+        "Idea & Data/Resources",
+        "Design",
+        "Development/Prototype",
+        "Prompt / Model Development",
+        "Getting to Production",
+        "Operations",
+        "Maintenance & Business Value",
+    ],
+}
+
+DATAFLOW_LABEL = "Data → Prompt → LLM → $"
+
+
+def pace_of_development() -> Dict[str, List[str]]:
+    """Return the canonical ordering of fases para ML tradicional y apps LLM."""
+
+    return PACE_OF_DEVELOPMENT
+
+
+def prompt_template(
+    idea: str,
+    domain_data: Dict[str, str],
+    pace_of_development: Dict[str, List[str]],
+) -> str:
+    """Create a template that contrasta los ritmos y exige prácticas SWE."""
+
+    traditional = " → ".join(pace_of_development["traditional_ml"])
+    llm = " → ".join(pace_of_development["llm_apps"])
+    return (
+        "You are designing a Hamilton micro-orchestration experiment.\n"
+        f"Traditional ML pace: {traditional}.\n"
+        f"LLM app pace: {llm}.\n"
+        "Explain how strong SWE practices (testing, modularity, reuse, portability)\n"
+        "keep the system resilient while iterating quickly.\n"
+        f"Business domain: {domain_data['business_process']} with UI {domain_data['ui']}.\n"
+        f"Primary data assets: {domain_data['data']}.\n"
+        f"Goal: deliver {idea} using Hamilton declarative functions.\n"
+    )
+
+
+def llm_prompt(prompt_template: str, edge_cases: List[str]) -> str:
+    """Combine template with guardrails against edge cases y prompt injection."""
+
+    formatted_edge_cases = ", ".join(edge_cases)
+    return (
+        f"{prompt_template}"
+        "Consider the following edge cases explicitly: "
+        f"{formatted_edge_cases}.\n"
+        "Detail the pipeline as Data → Prompt → LLM → $, highlighting how guardrails\n"
+        "prevent prompt injection and balance evaluation with GPU cost awareness."
+    )
+
+
+def llm_response(llm_prompt: str, llm_client: MockLLMClient) -> str:
+    """Obtain respuesta determinística del cliente LLM simulado."""
+
+    return llm_client.complete(llm_prompt)
+
+
+def prompt_token_estimate(llm_prompt: str, edge_cases: List[str]) -> int:
+    """Estimate token count con amortiguador para cobertura de edge cases."""
+
+    narrative_tokens = len(llm_prompt.split())
+    scaled_tokens = round(narrative_tokens * 0.75)
+    guardrail_tokens = len(edge_cases) * 3
+    return max(scaled_tokens + guardrail_tokens, 120)
+
+
+def business_value(
+    llm_response: str,
+    pace_of_development: Dict[str, List[str]],
+) -> Dict[str, Any]:
+    """Empaquetar plan de acción y el contexto de ritmo de desarrollo."""
+
+    return {
+        "llm_plan": llm_response,
+        "pace": pace_of_development,
+        "next_step": "Prototype with guarded prompts",
+    }
+
+
+def cost_estimate(
+    prompt_token_estimate: int,
+    pricing_policy: Dict[str, float],
+) -> float:
+    """Calcular costo esperado usando tarifa por 1K tokens y factor de seguridad."""
+
+    price = pricing_policy["price_per_1k_tokens"]
+    safety = pricing_policy.get("safety_multiplier", 1.0)
+    return round((prompt_token_estimate / 1000) * price * safety, 6)
+
+
+__all__ = [
+    "PACE_OF_DEVELOPMENT",
+    "DATAFLOW_LABEL",
+    "pace_of_development",
+    "prompt_template",
+    "llm_prompt",
+    "llm_response",
+    "prompt_token_estimate",
+    "business_value",
+    "cost_estimate",
+]
diff --git a/scripts/coding/ai/examples/hamilton_llm/driver.py b/scripts/coding/ai/examples/hamilton_llm/driver.py
@@ -0,0 +1,79 @@
+"""Minimal Hamilton-like driver for executing declarative dataflows.
+
+The real Hamilton framework provides a rich micro-orchestration engine. For the
+purposes of the repository we build a tiny subset that resolves dependencies by
+function name and executes only the nodes required to produce requested targets.
+"""
+
+from __future__ import annotations
+
+import inspect
+from types import ModuleType
+from typing import Any, Dict, Iterable, Mapping, Sequence
+
+
+class MissingDependencyError(RuntimeError):
+    """Raised when a dependency required by a node is not available."""
+
+
+class HamiltonDriver:
+    """Execute declarative functions registered from one or more modules.
+
+    Functions are registered by name and resolved lazily. Inputs provided via
+    ``execute`` act as seed values, mirroring Hamilton's configuration
+    dictionary. Each execution resets the cache and produces a log of executed
+    nodes so tests can assert on evaluation order.
+    """
+
+    def __init__(self, modules: Iterable[ModuleType]):
+        self._functions: Dict[str, Any] = {}
+        self.execution_log: list[str] = []
+        for module in modules:
+            self._register_module(module)
+
+    def _register_module(self, module: ModuleType) -> None:
+        for name, candidate in vars(module).items():
+            if inspect.isfunction(candidate):
+                self._functions[name] = candidate
+
+    def execute(self, targets: Sequence[str], inputs: Mapping[str, Any]) -> Dict[str, Any]:
+        cache: Dict[str, Any] = {}
+        context: Dict[str, Any] = dict(inputs)
+        self.execution_log = []
+
+        def resolve(name: str) -> Any:
+            if name in cache:
+                return cache[name]
+            if name in context:
+                return context[name]
+
+            func = self._functions.get(name)
+            if func is None:
+                raise MissingDependencyError(f"No data or function available for '{name}'")
+
+            signature = inspect.signature(func)
+            kwargs: Dict[str, Any] = {}
+            for parameter in signature.parameters.values():
+                if parameter.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
+                    raise MissingDependencyError(
+                        f"Unsupported parameter kind for '{func.__name__}': {parameter.kind}"
+                    )
+                dependency_name = parameter.name
+                try:
+                    kwargs[dependency_name] = resolve(dependency_name)
+                except MissingDependencyError as exc:  # pragma: no cover - rephrase message
+                    raise MissingDependencyError(
+                        f"Function '{func.__name__}' requires missing dependency '{dependency_name}'"
+                    ) from exc
+
+            value = func(**kwargs)
+            cache[name] = value
+            context[name] = value
+            self.execution_log.append(name)
+            return value
+
+        results = {target: resolve(target) for target in targets}
+        return results
+
+
+__all__ = ["HamiltonDriver", "MissingDependencyError"]
diff --git a/scripts/coding/ai/examples/hamilton_llm/llm_client.py b/scripts/coding/ai/examples/hamilton_llm/llm_client.py
@@ -0,0 +1,28 @@
+"""Deterministic mock client emulating an LLM completion API."""
+
+from __future__ import annotations
+
+from typing import Mapping
+
+
+class MockLLMClient:
+    """Return canned responses y exponer tarifa para estimar costos."""
+
+    def __init__(self, price_per_1k_tokens: float, response_catalog: Mapping[str, str]):
+        self.price_per_1k_tokens = price_per_1k_tokens
+        self._response_catalog = dict(response_catalog)
+
+    def complete(self, prompt: str) -> str:
+        """Return the first response cuyo identificador esté contenido en el prompt."""
+
+        lower_prompt = prompt.lower()
+        for key, response in self._response_catalog.items():
+            if key.lower() in lower_prompt:
+                return response
+        return self._response_catalog.get(
+            "__default__",
+            "Document modular functions, validate with pytest and guard against prompt injection.",
+        )
+
+
+__all__ = ["MockLLMClient"]