|
| 1 | +"""Declarative dataflow modeling the Data → Prompt → LLM → $ pipeline. |
| 2 | +
|
| 3 | +The module captures the pace differences between aplicaciones ML tradicionales y |
| 4 | +aplicaciones LLM, destacando que ambas requieren habilidades fuertes de |
| 5 | +ingeniería de software. Cada función sigue el paradigma Hamilton: el nombre es |
| 6 | +el output y los argumentos son las dependencias explícitas. |
| 7 | +""" |
| 8 | + |
| 9 | +from __future__ import annotations |
| 10 | + |
| 11 | +from typing import Any, Dict, List |
| 12 | + |
| 13 | +from .llm_client import MockLLMClient |
| 14 | + |
| 15 | +PACE_OF_DEVELOPMENT: Dict[str, List[str]] = { |
| 16 | + "traditional_ml": [ |
| 17 | + "Idea & Data/Resources", |
| 18 | + "Design", |
| 19 | + "Development/Prototype", |
| 20 | + "Model Development", |
| 21 | + "Getting to Production", |
| 22 | + "Operations", |
| 23 | + "Maintenance & Business Value", |
| 24 | + ], |
| 25 | + "llm_apps": [ |
| 26 | + "Idea & Data/Resources", |
| 27 | + "Design", |
| 28 | + "Development/Prototype", |
| 29 | + "Prompt / Model Development", |
| 30 | + "Getting to Production", |
| 31 | + "Operations", |
| 32 | + "Maintenance & Business Value", |
| 33 | + ], |
| 34 | +} |
| 35 | + |
| 36 | +DATAFLOW_LABEL = "Data → Prompt → LLM → $" |
| 37 | + |
| 38 | + |
| 39 | +def pace_of_development() -> Dict[str, List[str]]: |
| 40 | + """Return the canonical ordering of fases para ML tradicional y apps LLM.""" |
| 41 | + |
| 42 | + return PACE_OF_DEVELOPMENT |
| 43 | + |
| 44 | + |
| 45 | +def prompt_template( |
| 46 | + idea: str, |
| 47 | + domain_data: Dict[str, str], |
| 48 | + pace_of_development: Dict[str, List[str]], |
| 49 | +) -> str: |
| 50 | + """Create a template that contrasta los ritmos y exige prácticas SWE.""" |
| 51 | + |
| 52 | + traditional = " → ".join(pace_of_development["traditional_ml"]) |
| 53 | + llm = " → ".join(pace_of_development["llm_apps"]) |
| 54 | + return ( |
| 55 | + "You are designing a Hamilton micro-orchestration experiment.\n" |
| 56 | + f"Traditional ML pace: {traditional}.\n" |
| 57 | + f"LLM app pace: {llm}.\n" |
| 58 | + "Explain how strong SWE practices (testing, modularity, reuse, portability)\n" |
| 59 | + "keep the system resilient while iterating quickly.\n" |
| 60 | + f"Business domain: {domain_data['business_process']} with UI {domain_data['ui']}.\n" |
| 61 | + f"Primary data assets: {domain_data['data']}.\n" |
| 62 | + f"Goal: deliver {idea} using Hamilton declarative functions.\n" |
| 63 | + ) |
| 64 | + |
| 65 | + |
| 66 | +def llm_prompt(prompt_template: str, edge_cases: List[str]) -> str: |
| 67 | + """Combine template with guardrails against edge cases y prompt injection.""" |
| 68 | + |
| 69 | + formatted_edge_cases = ", ".join(edge_cases) |
| 70 | + return ( |
| 71 | + f"{prompt_template}" |
| 72 | + "Consider the following edge cases explicitly: " |
| 73 | + f"{formatted_edge_cases}.\n" |
| 74 | + "Detail the pipeline as Data → Prompt → LLM → $, highlighting how guardrails\n" |
| 75 | + "prevent prompt injection and balance evaluation with GPU cost awareness." |
| 76 | + ) |
| 77 | + |
| 78 | + |
| 79 | +def llm_response(llm_prompt: str, llm_client: MockLLMClient) -> str: |
| 80 | + """Obtain respuesta determinística del cliente LLM simulado.""" |
| 81 | + |
| 82 | + return llm_client.complete(llm_prompt) |
| 83 | + |
| 84 | + |
| 85 | +def prompt_token_estimate(llm_prompt: str, edge_cases: List[str]) -> int: |
| 86 | + """Estimate token count con amortiguador para cobertura de edge cases.""" |
| 87 | + |
| 88 | + narrative_tokens = len(llm_prompt.split()) |
| 89 | + scaled_tokens = round(narrative_tokens * 0.75) |
| 90 | + guardrail_tokens = len(edge_cases) * 3 |
| 91 | + return max(scaled_tokens + guardrail_tokens, 120) |
| 92 | + |
| 93 | + |
| 94 | +def business_value( |
| 95 | + llm_response: str, |
| 96 | + pace_of_development: Dict[str, List[str]], |
| 97 | +) -> Dict[str, Any]: |
| 98 | + """Empaquetar plan de acción y el contexto de ritmo de desarrollo.""" |
| 99 | + |
| 100 | + return { |
| 101 | + "llm_plan": llm_response, |
| 102 | + "pace": pace_of_development, |
| 103 | + "next_step": "Prototype with guarded prompts", |
| 104 | + } |
| 105 | + |
| 106 | + |
| 107 | +def cost_estimate( |
| 108 | + prompt_token_estimate: int, |
| 109 | + pricing_policy: Dict[str, float], |
| 110 | +) -> float: |
| 111 | + """Calcular costo esperado usando tarifa por 1K tokens y factor de seguridad.""" |
| 112 | + |
| 113 | + price = pricing_policy["price_per_1k_tokens"] |
| 114 | + safety = pricing_policy.get("safety_multiplier", 1.0) |
| 115 | + return round((prompt_token_estimate / 1000) * price * safety, 6) |
| 116 | + |
| 117 | + |
| 118 | +__all__ = [ |
| 119 | + "PACE_OF_DEVELOPMENT", |
| 120 | + "DATAFLOW_LABEL", |
| 121 | + "pace_of_development", |
| 122 | + "prompt_template", |
| 123 | + "llm_prompt", |
| 124 | + "llm_response", |
| 125 | + "prompt_token_estimate", |
| 126 | + "business_value", |
| 127 | + "cost_estimate", |
| 128 | +] |
0 commit comments